library(skimr)
# Import the data
df <- read.csv("merged_LTC_odhf_quality.csv", na.strings="", header=TRUE)
skim(df)
| Name | df |
| Number of rows | 615 |
| Number of columns | 38 |
| _______________________ | |
| Column type frequency: | |
| factor | 16 |
| numeric | 22 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| name | 0 | 1.00 | FALSE | 615 | AFT: 1, ALB: 1, ALE: 1, ALG: 1 |
| cleaned_name | 0 | 1.00 | FALSE | 615 | aft: 1, alb: 1, ale: 1, alg: 1 |
| address | 0 | 1.00 | FALSE | 614 | 180: 2, 1 B: 1, 1 M: 1, 1 N: 1 |
| LHIN | 0 | 1.00 | FALSE | 14 | Ham: 85, Sou: 76, Cen: 68, Cha: 57 |
| home_type | 0 | 1.00 | FALSE | 3 | For: 351, Non: 164, Mun: 100 |
| city | 0 | 1.00 | FALSE | 263 | Tor: 37, Sca: 20, Ham: 16, Ott: 16 |
| postal_code | 0 | 1.00 | FALSE | 594 | K0: 4, K0: 2, K0: 2, K0: 2 |
| short_stay | 0 | 1.00 | FALSE | 2 | No: 412, Yes: 203 |
| residents_council | 0 | 1.00 | FALSE | 2 | Yes: 606, No: 9 |
| family_council | 0 | 1.00 | FALSE | 2 | Yes: 510, No: 105 |
| accreditation | 0 | 1.00 | FALSE | 2 | Yes: 511, No: 104 |
| confirmed_resident_cases | 486 | 0.21 | FALSE | 38 | <5: 48, 0: 36, 25: 3, 28: 3 |
| resident_deaths | 323 | 0.47 | FALSE | 36 | 0: 195, <5: 25, 11: 9, 18: 6 |
| confirmed_staff_cases | 486 | 0.21 | FALSE | 30 | <5: 53, 0: 24, 10: 4, 22: 4 |
| status | 323 | 0.47 | FALSE | 2 | Ina: 163, Act: 129 |
| CSDname | 1 | 1.00 | FALSE | 208 | Tor: 85, Ott: 27, Ham: 26, Lon: 15 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| total_inspections | 0 | 1 | 37.68 | 21.95 | 9.00 | 22.00 | 32.00 | 47.00 | 172.00 | ▇▃▁▁▁ |
| X5y_inspections | 0 | 1 | 19.82 | 11.95 | 5.00 | 12.00 | 17.00 | 24.00 | 86.00 | ▇▃▁▁▁ |
| X2y_inspections | 0 | 1 | 9.20 | 5.59 | 1.00 | 5.00 | 8.00 | 12.00 | 44.00 | ▇▅▁▁▁ |
| total_complaints | 0 | 1 | 14.70 | 11.64 | 0.00 | 7.00 | 12.00 | 20.00 | 94.00 | ▇▂▁▁▁ |
| X5y_complaints | 0 | 1 | 6.60 | 5.93 | 0.00 | 2.00 | 5.00 | 9.00 | 46.00 | ▇▂▁▁▁ |
| X2y_complaints | 0 | 1 | 3.20 | 2.86 | 0.00 | 1.00 | 3.00 | 5.00 | 20.00 | ▇▂▁▁▁ |
| total_critical | 0 | 1 | 12.61 | 9.17 | 0.00 | 6.50 | 11.00 | 16.00 | 62.00 | ▇▃▁▁▁ |
| X5y_critical | 0 | 1 | 6.54 | 4.82 | 0.00 | 3.00 | 5.00 | 8.00 | 40.00 | ▇▂▁▁▁ |
| X2y_critical | 0 | 1 | 3.98 | 2.54 | 0.00 | 2.00 | 3.00 | 5.00 | 26.00 | ▇▂▁▁▁ |
| total_withOrders | 0 | 1 | 6.88 | 6.17 | 0.00 | 3.00 | 5.00 | 9.00 | 43.00 | ▇▂▁▁▁ |
| X5y_withOrders | 0 | 1 | 4.41 | 4.35 | 0.00 | 2.00 | 3.00 | 6.00 | 34.00 | ▇▂▁▁▁ |
| X2y_withOrders | 0 | 1 | 1.85 | 2.20 | 0.00 | 0.00 | 1.00 | 3.00 | 16.00 | ▇▁▁▁▁ |
| antipsychotic_percent | 0 | 1 | 18.47 | 7.74 | 0.00 | 13.45 | 18.00 | 23.00 | 62.80 | ▃▇▂▁▁ |
| depression_percent | 0 | 1 | 23.40 | 10.09 | 1.50 | 15.25 | 23.40 | 31.40 | 48.40 | ▃▇▇▇▂ |
| falls_percent | 0 | 1 | 16.49 | 4.83 | 1.80 | 13.35 | 16.40 | 19.70 | 33.20 | ▁▅▇▂▁ |
| pressure_ulcers_percent | 0 | 1 | 2.74 | 1.53 | 0.00 | 1.60 | 2.60 | 3.60 | 11.00 | ▇▇▂▁▁ |
| pain_percent | 0 | 1 | 5.67 | 5.77 | 0.00 | 1.95 | 3.90 | 7.50 | 48.00 | ▇▂▁▁▁ |
| restraints_percent | 0 | 1 | 4.21 | 5.75 | 0.00 | 0.40 | 2.20 | 5.80 | 42.00 | ▇▁▁▁▁ |
| number_beds | 0 | 1 | 127.27 | 73.83 | 12.00 | 69.00 | 120.00 | 160.00 | 543.00 | ▇▇▁▁▁ |
| CSDuid | 2 | 1 | 3527707.51 | 14148.84 | 3501005.00 | 3520005.00 | 3525005.00 | 3539005.00 | 3560042.00 | ▃▇▅▅▂ |
| latitude | 1 | 1 | 44.20 | 1.47 | 42.03 | 43.32 | 43.76 | 44.55 | 51.01 | ▇▅▁▁▁ |
| longitude | 1 | 1 | -79.62 | 6.99 | -94.57 | -80.75 | -79.63 | -79.17 | 79.63 | ▇▁▁▁▁ |
# Add a column with outbreak status
df$outbreak <- with(df, ifelse(is.na(status), 'no', 'yes'))
# Add a column of LHIN health regions
unique(df[['LHIN']])
## [1] Hamilton Niagara Haldimand Brant (Hnhb)
## [2] North West
## [3] Erie St. Clair
## [4] Champlain
## [5] Toronto Central
## [6] Waterloo Wellington
## [7] North Simcoe Muskoka
## [8] Central East
## [9] North East
## [10] South East
## [11] South West
## [12] Central
## [13] Mississauga Halton
## [14] Central West
## 14 Levels: Central Central East Central West Champlain ... Waterloo Wellington
df$region[df$LHIN=='North West' | df$LHIN=='North East'] <- "North"
df$region[df$LHIN=='Toronto Central'] <- "Toronto"
df$region[df$LHIN=='Champlain' | df$LHIN=='Central East' | df$LHIN=='South East'] <- "East"
df$region[df$LHIN=='Erie St. Clair' | df$LHIN=='Hamilton Niagara Haldimand Brant (Hnhb)' | df$LHIN=='South West' | df$LHIN=='Waterloo Wellington'] <- "West"
df$region[df$LHIN=='Mississauga Halton' | df$LHIN=='Central West' | df$LHIN=='Central' | df$LHIN=='North Simcoe Muskoka'] <- "Central"
# Sum of complaints and critical
df$total_cc <- df$total_complaints + df$total_critical
df$X5y_cc <- df$X5y_complaints + df$X5y_critical
df$X2y_cc <- df$X2y_complaints + df$X2y_critical
# Sum of complaints, critical and withOrders
df$total_ccw <- df$total_critical + df$total_complaints + df$total_withOrders
df$X5y_ccw <- df$X5y_complaints + df$X5y_critical + df$X5y_withOrders
df$X2y_ccw <- df$X2y_complaints + df$X2y_critical + df$X2y_withOrders
# Number of non-complaints inspections
df$total_noncomplaints <- df$total_inspections - df$total_complaints
df$X5y_noncomplaints <- df$X5y_inspections - df$X5y_complaints
df$X2y_noncomplaints <- df$X2y_inspections - df$X2y_complaints
# Update data types for some columns
df[, 'outbreak'] <- as.factor(df[, 'outbreak'])
df[, 'region'] <- as.factor(df[, 'region'])
# Change data type of character
listofcol = list('name', 'cleaned_name', 'address', 'city', 'postal_code', 'CSDname')
for (each in listofcol){
df[, each] <- as.character(df[, each])
}
skim(df)
| Name | df |
| Number of rows | 615 |
| Number of columns | 49 |
| _______________________ | |
| Column type frequency: | |
| character | 6 |
| factor | 12 |
| numeric | 31 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| name | 0 | 1 | 7 | 84 | 0 | 615 | 0 |
| cleaned_name | 0 | 1 | 7 | 84 | 0 | 615 | 0 |
| address | 0 | 1 | 8 | 61 | 0 | 614 | 0 |
| city | 0 | 1 | 3 | 19 | 0 | 263 | 0 |
| postal_code | 0 | 1 | 7 | 7 | 0 | 594 | 615 |
| CSDname | 1 | 1 | 3 | 39 | 0 | 208 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| LHIN | 0 | 1.00 | FALSE | 14 | Ham: 85, Sou: 76, Cen: 68, Cha: 57 |
| home_type | 0 | 1.00 | FALSE | 3 | For: 351, Non: 164, Mun: 100 |
| short_stay | 0 | 1.00 | FALSE | 2 | No: 412, Yes: 203 |
| residents_council | 0 | 1.00 | FALSE | 2 | Yes: 606, No: 9 |
| family_council | 0 | 1.00 | FALSE | 2 | Yes: 510, No: 105 |
| accreditation | 0 | 1.00 | FALSE | 2 | Yes: 511, No: 104 |
| confirmed_resident_cases | 486 | 0.21 | FALSE | 38 | <5: 48, 0: 36, 25: 3, 28: 3 |
| resident_deaths | 323 | 0.47 | FALSE | 36 | 0: 195, <5: 25, 11: 9, 18: 6 |
| confirmed_staff_cases | 486 | 0.21 | FALSE | 30 | <5: 53, 0: 24, 10: 4, 22: 4 |
| status | 323 | 0.47 | FALSE | 2 | Ina: 163, Act: 129 |
| outbreak | 0 | 1.00 | FALSE | 2 | no: 323, yes: 292 |
| region | 0 | 1.00 | FALSE | 5 | Wes: 233, Eas: 161, Cen: 122, Nor: 63 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| total_inspections | 0 | 1 | 37.68 | 21.95 | 9.00 | 22.00 | 32.00 | 47.00 | 172.00 | ▇▃▁▁▁ |
| X5y_inspections | 0 | 1 | 19.82 | 11.95 | 5.00 | 12.00 | 17.00 | 24.00 | 86.00 | ▇▃▁▁▁ |
| X2y_inspections | 0 | 1 | 9.20 | 5.59 | 1.00 | 5.00 | 8.00 | 12.00 | 44.00 | ▇▅▁▁▁ |
| total_complaints | 0 | 1 | 14.70 | 11.64 | 0.00 | 7.00 | 12.00 | 20.00 | 94.00 | ▇▂▁▁▁ |
| X5y_complaints | 0 | 1 | 6.60 | 5.93 | 0.00 | 2.00 | 5.00 | 9.00 | 46.00 | ▇▂▁▁▁ |
| X2y_complaints | 0 | 1 | 3.20 | 2.86 | 0.00 | 1.00 | 3.00 | 5.00 | 20.00 | ▇▂▁▁▁ |
| total_critical | 0 | 1 | 12.61 | 9.17 | 0.00 | 6.50 | 11.00 | 16.00 | 62.00 | ▇▃▁▁▁ |
| X5y_critical | 0 | 1 | 6.54 | 4.82 | 0.00 | 3.00 | 5.00 | 8.00 | 40.00 | ▇▂▁▁▁ |
| X2y_critical | 0 | 1 | 3.98 | 2.54 | 0.00 | 2.00 | 3.00 | 5.00 | 26.00 | ▇▂▁▁▁ |
| total_withOrders | 0 | 1 | 6.88 | 6.17 | 0.00 | 3.00 | 5.00 | 9.00 | 43.00 | ▇▂▁▁▁ |
| X5y_withOrders | 0 | 1 | 4.41 | 4.35 | 0.00 | 2.00 | 3.00 | 6.00 | 34.00 | ▇▂▁▁▁ |
| X2y_withOrders | 0 | 1 | 1.85 | 2.20 | 0.00 | 0.00 | 1.00 | 3.00 | 16.00 | ▇▁▁▁▁ |
| antipsychotic_percent | 0 | 1 | 18.47 | 7.74 | 0.00 | 13.45 | 18.00 | 23.00 | 62.80 | ▃▇▂▁▁ |
| depression_percent | 0 | 1 | 23.40 | 10.09 | 1.50 | 15.25 | 23.40 | 31.40 | 48.40 | ▃▇▇▇▂ |
| falls_percent | 0 | 1 | 16.49 | 4.83 | 1.80 | 13.35 | 16.40 | 19.70 | 33.20 | ▁▅▇▂▁ |
| pressure_ulcers_percent | 0 | 1 | 2.74 | 1.53 | 0.00 | 1.60 | 2.60 | 3.60 | 11.00 | ▇▇▂▁▁ |
| pain_percent | 0 | 1 | 5.67 | 5.77 | 0.00 | 1.95 | 3.90 | 7.50 | 48.00 | ▇▂▁▁▁ |
| restraints_percent | 0 | 1 | 4.21 | 5.75 | 0.00 | 0.40 | 2.20 | 5.80 | 42.00 | ▇▁▁▁▁ |
| number_beds | 0 | 1 | 127.27 | 73.83 | 12.00 | 69.00 | 120.00 | 160.00 | 543.00 | ▇▇▁▁▁ |
| CSDuid | 2 | 1 | 3527707.51 | 14148.84 | 3501005.00 | 3520005.00 | 3525005.00 | 3539005.00 | 3560042.00 | ▃▇▅▅▂ |
| latitude | 1 | 1 | 44.20 | 1.47 | 42.03 | 43.32 | 43.76 | 44.55 | 51.01 | ▇▅▁▁▁ |
| longitude | 1 | 1 | -79.62 | 6.99 | -94.57 | -80.75 | -79.63 | -79.17 | 79.63 | ▇▁▁▁▁ |
| total_cc | 0 | 1 | 27.32 | 19.14 | 0.00 | 14.00 | 23.00 | 35.50 | 153.00 | ▇▃▁▁▁ |
| X5y_cc | 0 | 1 | 13.14 | 9.91 | 0.00 | 7.00 | 10.00 | 17.00 | 63.00 | ▇▅▁▁▁ |
| X2y_cc | 0 | 1 | 7.17 | 4.66 | 0.00 | 4.00 | 6.00 | 9.00 | 34.00 | ▇▆▁▁▁ |
| total_ccw | 0 | 1 | 34.20 | 22.68 | 2.00 | 18.00 | 28.00 | 44.00 | 165.00 | ▇▃▁▁▁ |
| X5y_ccw | 0 | 1 | 17.54 | 12.63 | 1.00 | 9.00 | 15.00 | 22.00 | 93.00 | ▇▃▁▁▁ |
| X2y_ccw | 0 | 1 | 9.02 | 6.00 | 0.00 | 5.00 | 8.00 | 12.00 | 45.00 | ▇▃▁▁▁ |
| total_noncomplaints | 0 | 1 | 22.98 | 12.09 | 5.00 | 15.00 | 20.00 | 28.00 | 90.00 | ▇▃▁▁▁ |
| X5y_noncomplaints | 0 | 1 | 13.23 | 7.11 | 4.00 | 9.00 | 11.00 | 15.00 | 68.00 | ▇▂▁▁▁ |
| X2y_noncomplaints | 0 | 1 | 6.00 | 3.64 | 0.00 | 4.00 | 5.00 | 8.00 | 40.00 | ▇▂▁▁▁ |
# Create lists of variables used to filter data for analysis
keep = c("outbreak", "home_type", "number_beds")
profile = c("short_stay", "residents_council", "family_council", "accreditation")
location = c('address', 'LHIN', 'region', 'city', 'postal_code', 'CSDname', 'CSDuid', 'latitude', 'longitude')
quality = c("antipsychotic_percent", "depression_percent", "falls_percent", "pressure_ulcers_percent", "pain_percent")
all_inspections = c("total_inspections", "X5y_inspections", "X2y_inspections")
complaints = c("total_complaints", "X5y_complaints", "X2y_complaints")
critical = c("total_critical", "X5y_critical", "X2y_critical")
noncomplaints = c("total_noncomplaints", "X5y_noncomplaints", "X2y_noncomplaints")
withOrders = c("total_withOrders", "X5y_withOrders", "X2y_withOrders")
cc_ccw = c("total_cc", "X5y_cc", "X2y_cc", "total_ccw", "X5y_ccw", "X2y_ccw")
covid = c("confirmed_resident_cases", "resident_deaths", "confirmed_staff_cases")
data <- subset(df, select = c(keep, profile, 'region', quality, all_inspections, complaints, critical, noncomplaints, withOrders, cc_ccw))
# Check for R skewness in numeric predictors
library(e1071)
listofcols = c('number_beds', quality, all_inspections, complaints, noncomplaints, critical, withOrders, cc_ccw)
for (each in listofcols){
print(each)
print(skewness(data[[each]]))
qqnorm(data[[each]], main = each)
qqline(data[[each]], col = 'red')
}
## [1] "number_beds"
## [1] 1.510832
## [1] "antipsychotic_percent"
## [1] 0.7192037
## [1] "depression_percent"
## [1] 0.02250263
## [1] "falls_percent"
## [1] 0.1987631
## [1] "pressure_ulcers_percent"
## [1] 0.9435391
## [1] "pain_percent"
## [1] 2.534039
## [1] "total_inspections"
## [1] 1.563433
## [1] "X5y_inspections"
## [1] 1.991566
## [1] "X2y_inspections"
## [1] 1.947724
## [1] "total_complaints"
## [1] 1.716117
## [1] "X5y_complaints"
## [1] 1.927852
## [1] "X2y_complaints"
## [1] 1.984473
## [1] "total_noncomplaints"
## [1] 1.729386
## [1] "X5y_noncomplaints"
## [1] 2.374406
## [1] "X2y_noncomplaints"
## [1] 2.632259
## [1] "total_critical"
## [1] 1.845144
## [1] "X5y_critical"
## [1] 2.089656
## [1] "X2y_critical"
## [1] 2.263202
## [1] "total_withOrders"
## [1] 1.988978
## [1] "X5y_withOrders"
## [1] 2.57025
## [1] "X2y_withOrders"
## [1] 2.199904
## [1] "total_cc"
## [1] 1.66822
## [1] "X5y_cc"
## [1] 1.852097
## [1] "X2y_cc"
## [1] 1.747409
## [1] "total_ccw"
## [1] 1.457927
## [1] "X5y_ccw"
## [1] 1.94936
## [1] "X2y_ccw"
## [1] 1.847578
We see above that the inspections and number of beds data are highly right skewed with skewness values > 1. The quality data are not as right skewed with skewness values between 0 and 1, except for pain_percent which has a skewness value of 2.5.
The highly skewed variables will have to be transformed for the analysis, but first we explore the variables to assess for any zero values.
for (each in listofcols){
print(each)
print(summary(df[[each]]))
}
## [1] "number_beds"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 12.0 69.0 120.0 127.3 160.0 543.0
## [1] "antipsychotic_percent"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 13.45 18.00 18.47 23.00 62.80
## [1] "depression_percent"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.50 15.25 23.40 23.40 31.40 48.40
## [1] "falls_percent"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.80 13.35 16.40 16.49 19.70 33.20
## [1] "pressure_ulcers_percent"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 1.60 2.60 2.74 3.60 11.00
## [1] "pain_percent"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.950 3.900 5.666 7.500 48.000
## [1] "total_inspections"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 9.00 22.00 32.00 37.68 47.00 172.00
## [1] "X5y_inspections"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 12.00 17.00 19.82 24.00 86.00
## [1] "X2y_inspections"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 5.000 8.000 9.195 12.000 44.000
## [1] "total_complaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0 7.0 12.0 14.7 20.0 94.0
## [1] "X5y_complaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 5.000 6.595 9.000 46.000
## [1] "X2y_complaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 1.000 3.000 3.195 5.000 20.000
## [1] "total_noncomplaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.00 15.00 20.00 22.98 28.00 90.00
## [1] "X5y_noncomplaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 9.00 11.00 13.23 15.00 68.00
## [1] "X2y_noncomplaints"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0 4 5 6 8 40
## [1] "total_critical"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 6.50 11.00 12.61 16.00 62.00
## [1] "X5y_critical"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 3.000 5.000 6.543 8.000 40.000
## [1] "X2y_critical"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 3.000 3.976 5.000 26.000
## [1] "total_withOrders"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 3.00 5.00 6.88 9.00 43.00
## [1] "X5y_withOrders"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 3.000 4.407 6.000 34.000
## [1] "X2y_withOrders"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 0.00 1.00 1.85 3.00 16.00
## [1] "total_cc"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 14.00 23.00 27.32 35.50 153.00
## [1] "X5y_cc"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 7.00 10.00 13.14 17.00 63.00
## [1] "X2y_cc"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 4.000 6.000 7.171 9.000 34.000
## [1] "total_ccw"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.0 18.0 28.0 34.2 44.0 165.0
## [1] "X5y_ccw"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 9.00 15.00 17.54 22.00 93.00
## [1] "X2y_ccw"
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 5.000 8.000 9.021 12.000 45.000
We see above that the following variables contain zeros: - antipsychotic_percent - pressure_ulcers_percent - pain_percent - complaints group - critical group - withOrders group - cc group - X2y_noncomplaints - X2y_ccw
# Create a list of numeric variables with 0 values
list0s <- c('antipsychotic_percent', 'pressure_ulcers_percent', 'pain_percent', complaints, 'X2y_noncomplaints', critical, withOrders, 'total_cc', 'X5y_cc', 'X2y_cc', 'X2y_ccw')
library(rcompanion)
listofcols <- setdiff(listofcols, list0s)
# Visualize the effect of square root and log transformations on the data without 0s
for (each in listofcols) {
plotNormalHistogram(x = data[[each]], main = each)
plotNormalHistogram(x = sqrt(data[[each]]), main = c(each, 'sqrt trans'))
plotNormalHistogram(x = log(data[[each]]), main = c(each, 'log trans'))
}
As shown above a square root transformation seems best for the following data: - number_beds
A log transformation seems better for the following data: - all_inspections group - total_noncomplaints - X5y_noncomplaints - total_ccw - X5y_ccw
# Visualize the effect of a square root transformation on the variables with 0s
for (each in list0s) {
plotNormalHistogram(x = data[[each]], main = each)
plotNormalHistogram(x = sqrt(data[[each]]), main = c(each, 'sqrt trans'))
}
As shown above, in all cases where variables contain 0s, a square root transformation seems to improve the distribution.
# Log transform in place
data[c(all_inspections, 'total_noncomplaints', 'X5y_noncomplaints', 'total_ccw', 'X5y_ccw')] <- log(data[c(all_inspections, 'total_noncomplaints', 'X5y_noncomplaints', 'total_ccw', 'X5y_ccw')])
# Square root transform in place
data[c('number_beds', list0s)] <- sqrt(data[c('number_beds', list0s)])
skim(data)
| Name | data |
| Number of rows | 615 |
| Number of columns | 34 |
| _______________________ | |
| Column type frequency: | |
| factor | 7 |
| numeric | 27 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| outbreak | 0 | 1 | FALSE | 2 | no: 323, yes: 292 |
| home_type | 0 | 1 | FALSE | 3 | For: 351, Non: 164, Mun: 100 |
| short_stay | 0 | 1 | FALSE | 2 | No: 412, Yes: 203 |
| residents_council | 0 | 1 | FALSE | 2 | Yes: 606, No: 9 |
| family_council | 0 | 1 | FALSE | 2 | Yes: 510, No: 105 |
| accreditation | 0 | 1 | FALSE | 2 | Yes: 511, No: 104 |
| region | 0 | 1 | FALSE | 5 | Wes: 233, Eas: 161, Cen: 122, Nor: 63 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| number_beds | 0 | 1 | 10.84 | 3.13 | 3.46 | 8.31 | 10.95 | 12.65 | 23.30 | ▂▇▅▁▁ |
| antipsychotic_percent | 0 | 1 | 4.19 | 0.94 | 0.00 | 3.67 | 4.24 | 4.80 | 7.92 | ▁▂▇▃▁ |
| depression_percent | 0 | 1 | 23.40 | 10.09 | 1.50 | 15.25 | 23.40 | 31.40 | 48.40 | ▃▇▇▇▂ |
| falls_percent | 0 | 1 | 16.49 | 4.83 | 1.80 | 13.35 | 16.40 | 19.70 | 33.20 | ▁▅▇▂▁ |
| pressure_ulcers_percent | 0 | 1 | 1.58 | 0.50 | 0.00 | 1.26 | 1.61 | 1.90 | 3.32 | ▁▃▇▃▁ |
| pain_percent | 0 | 1 | 2.13 | 1.07 | 0.00 | 1.40 | 1.97 | 2.74 | 6.93 | ▃▇▃▁▁ |
| total_inspections | 0 | 1 | 3.48 | 0.54 | 2.20 | 3.09 | 3.47 | 3.85 | 5.15 | ▂▇▇▃▁ |
| X5y_inspections | 0 | 1 | 2.84 | 0.52 | 1.61 | 2.48 | 2.83 | 3.18 | 4.45 | ▂▇▇▃▁ |
| X2y_inspections | 0 | 1 | 2.06 | 0.56 | 0.00 | 1.61 | 2.08 | 2.48 | 3.78 | ▁▂▇▆▁ |
| total_complaints | 0 | 1 | 3.55 | 1.44 | 0.00 | 2.65 | 3.46 | 4.47 | 9.70 | ▂▇▅▁▁ |
| X5y_complaints | 0 | 1 | 2.31 | 1.12 | 0.00 | 1.41 | 2.24 | 3.00 | 6.78 | ▂▇▃▁▁ |
| X2y_complaints | 0 | 1 | 1.57 | 0.86 | 0.00 | 1.00 | 1.73 | 2.24 | 4.47 | ▂▇▅▁▁ |
| total_critical | 0 | 1 | 3.35 | 1.19 | 0.00 | 2.55 | 3.32 | 4.00 | 7.87 | ▁▇▇▂▁ |
| X5y_critical | 0 | 1 | 2.41 | 0.85 | 0.00 | 1.73 | 2.24 | 2.83 | 6.32 | ▁▇▅▁▁ |
| X2y_critical | 0 | 1 | 1.90 | 0.61 | 0.00 | 1.41 | 1.73 | 2.24 | 5.10 | ▂▇▃▁▁ |
| total_noncomplaints | 0 | 1 | 3.02 | 0.47 | 1.61 | 2.71 | 3.00 | 3.33 | 4.50 | ▁▅▇▃▁ |
| X5y_noncomplaints | 0 | 1 | 2.47 | 0.45 | 1.39 | 2.20 | 2.40 | 2.71 | 4.22 | ▂▇▆▂▁ |
| X2y_noncomplaints | 0 | 1 | 2.36 | 0.67 | 0.00 | 2.00 | 2.24 | 2.83 | 6.32 | ▁▇▅▁▁ |
| total_withOrders | 0 | 1 | 2.37 | 1.12 | 0.00 | 1.73 | 2.24 | 3.00 | 6.56 | ▂▇▅▁▁ |
| X5y_withOrders | 0 | 1 | 1.84 | 1.00 | 0.00 | 1.41 | 1.73 | 2.45 | 5.83 | ▅▇▅▁▁ |
| X2y_withOrders | 0 | 1 | 1.06 | 0.85 | 0.00 | 0.00 | 1.00 | 1.73 | 4.00 | ▆▇▃▁▁ |
| total_cc | 0 | 1 | 4.94 | 1.70 | 0.00 | 3.74 | 4.80 | 5.96 | 12.37 | ▁▇▆▁▁ |
| X5y_cc | 0 | 1 | 3.41 | 1.24 | 0.00 | 2.65 | 3.16 | 4.12 | 7.94 | ▁▇▆▂▁ |
| X2y_cc | 0 | 1 | 2.55 | 0.82 | 0.00 | 2.00 | 2.45 | 3.00 | 5.83 | ▁▇▇▂▁ |
| total_ccw | 0 | 1 | 3.32 | 0.67 | 0.69 | 2.89 | 3.33 | 3.78 | 5.11 | ▁▂▇▇▂ |
| X5y_ccw | 0 | 1 | 2.64 | 0.69 | 0.00 | 2.20 | 2.71 | 3.09 | 4.53 | ▁▂▇▇▂ |
| X2y_ccw | 0 | 1 | 2.86 | 0.93 | 0.00 | 2.24 | 2.83 | 3.46 | 6.71 | ▁▇▇▂▁ |
xtabs(~outbreak + home_type, data=df)
## home_type
## outbreak For-Profit Municipal Non-Profit
## no 184 56 83
## yes 167 44 81
xtabs(~accreditation + home_type, data=df)
## home_type
## accreditation For-Profit Municipal Non-Profit
## No 25 29 50
## Yes 326 71 114
xtabs(~short_stay + home_type, data=df)
## home_type
## short_stay For-Profit Municipal Non-Profit
## No 248 53 111
## Yes 103 47 53
xtabs(~family_council + home_type, data=df)
## home_type
## family_council For-Profit Municipal Non-Profit
## No 65 10 30
## Yes 286 90 134
xtabs(~residents_council + home_type, data=df)
## home_type
## residents_council For-Profit Municipal Non-Profit
## No 5 0 4
## Yes 346 100 160
xtabs(~residents_council + outbreak, data=df)
## outbreak
## residents_council no yes
## No 3 6
## Yes 320 286
xtabs(~accreditation + outbreak, data=df)
## outbreak
## accreditation no yes
## No 62 42
## Yes 261 250
xtabs(~short_stay + outbreak, data=df)
## outbreak
## short_stay no yes
## No 219 193
## Yes 104 99
xtabs(~family_council + outbreak, data=df)
## outbreak
## family_council no yes
## No 68 37
## Yes 255 255
xtabs(~family_council + residents_council, data=df)
## residents_council
## family_council No Yes
## No 5 100
## Yes 4 506
# Fit logistic regression using transformed inspection counts
fit = glm(outbreak~., family=binomial, data=data)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2487 -0.9295 -0.4374 0.9504 2.1831
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.143193 2.840671 1.106 0.2685
## home_typeMunicipal -0.533617 0.294532 -1.812 0.0700 .
## home_typeNon-Profit 0.048595 0.251397 0.193 0.8467
## number_beds 0.232655 0.050627 4.595 4.32e-06 ***
## short_stayYes -0.181057 0.210418 -0.860 0.3895
## residents_councilYes -0.521201 0.953494 -0.547 0.5846
## family_councilYes 0.157426 0.267084 0.589 0.5556
## accreditationYes -0.236254 0.278384 -0.849 0.3961
## regionEast -0.237564 0.324196 -0.733 0.4637
## regionNorth -0.946643 0.427283 -2.215 0.0267 *
## regionToronto 0.689524 0.484719 1.423 0.1549
## regionWest -0.511712 0.290713 -1.760 0.0784 .
## antipsychotic_percent 0.124144 0.105996 1.171 0.2415
## depression_percent 0.005139 0.010747 0.478 0.6325
## falls_percent -0.012270 0.021731 -0.565 0.5723
## pressure_ulcers_percent -0.012357 0.228484 -0.054 0.9569
## pain_percent -0.128315 0.098233 -1.306 0.1915
## total_inspections -7.622314 4.436957 -1.718 0.0858 .
## X5y_inspections 5.059650 3.793593 1.334 0.1823
## X2y_inspections -1.082597 1.207296 -0.897 0.3699
## total_complaints 0.862600 0.978836 0.881 0.3782
## X5y_complaints 0.087526 0.839495 0.104 0.9170
## X2y_complaints -0.267613 0.604419 -0.443 0.6579
## total_critical -0.544059 1.194274 -0.456 0.6487
## X5y_critical 1.538881 1.340180 1.148 0.2509
## X2y_critical -1.358774 0.965301 -1.408 0.1592
## total_noncomplaints 3.844665 2.718140 1.414 0.1572
## X5y_noncomplaints -4.774187 2.600318 -1.836 0.0664 .
## X2y_noncomplaints 1.090983 0.776042 1.406 0.1598
## total_withOrders -0.262043 0.346652 -0.756 0.4497
## X5y_withOrders 0.756159 0.394837 1.915 0.0555 .
## X2y_withOrders 0.135237 0.422691 0.320 0.7490
## total_cc 0.717939 1.517787 0.473 0.6362
## X5y_cc -1.305044 1.571094 -0.831 0.4062
## X2y_cc 2.055378 1.531700 1.342 0.1796
## total_ccw 1.548906 1.813860 0.854 0.3931
## X5y_ccw -0.942757 1.309266 -0.720 0.4715
## X2y_ccw -1.033641 1.096100 -0.943 0.3457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 695.67 on 577 degrees of freedom
## AIC: 771.67
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -pressure_ulcers_percent)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + depression_percent + falls_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X5y_complaints + X2y_complaints + total_critical +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X2y_withOrders +
## total_cc + X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw,
## family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2486 -0.9255 -0.4352 0.9507 2.1813
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.13019 2.83094 1.106 0.2689
## home_typeMunicipal -0.53409 0.29440 -1.814 0.0696 .
## home_typeNon-Profit 0.04886 0.25133 0.194 0.8458
## number_beds 0.23273 0.05061 4.598 4.26e-06 ***
## short_stayYes -0.18205 0.20962 -0.868 0.3851
## residents_councilYes -0.52076 0.95282 -0.547 0.5847
## family_councilYes 0.15860 0.26620 0.596 0.5513
## accreditationYes -0.23458 0.27665 -0.848 0.3965
## regionEast -0.23736 0.32417 -0.732 0.4640
## regionNorth -0.94476 0.42591 -2.218 0.0265 *
## regionToronto 0.68882 0.48450 1.422 0.1551
## regionWest -0.51201 0.29065 -1.762 0.0781 .
## antipsychotic_percent 0.12318 0.10450 1.179 0.2385
## depression_percent 0.00499 0.01039 0.480 0.6309
## falls_percent -0.01241 0.02158 -0.575 0.5653
## pain_percent -0.12866 0.09802 -1.313 0.1893
## total_inspections -7.61806 4.43813 -1.717 0.0861 .
## X5y_inspections 5.05227 3.79136 1.333 0.1827
## X2y_inspections -1.08154 1.20721 -0.896 0.3703
## total_complaints 0.86025 0.97812 0.879 0.3791
## X5y_complaints 0.08803 0.83947 0.105 0.9165
## X2y_complaints -0.26833 0.60424 -0.444 0.6570
## total_critical -0.54448 1.19461 -0.456 0.6485
## X5y_critical 1.53645 1.33935 1.147 0.2513
## X2y_critical -1.35840 0.96522 -1.407 0.1593
## total_noncomplaints 3.84332 2.71878 1.414 0.1575
## X5y_noncomplaints -4.77013 2.59908 -1.835 0.0665 .
## X2y_noncomplaints 1.08974 0.77560 1.405 0.1600
## total_withOrders -0.26317 0.34604 -0.761 0.4469
## X5y_withOrders 0.75559 0.39471 1.914 0.0556 .
## X2y_withOrders 0.13512 0.42277 0.320 0.7493
## total_cc 0.71902 1.51816 0.474 0.6358
## X5y_cc -1.30328 1.57069 -0.830 0.4067
## X2y_cc 2.05514 1.53186 1.342 0.1797
## total_ccw 1.55105 1.81438 0.855 0.3926
## X5y_ccw -0.94317 1.30954 -0.720 0.4714
## X2y_ccw -1.03245 1.09623 -0.942 0.3463
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 695.68 on 578 degrees of freedom
## AIC: 769.68
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + depression_percent + falls_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X2y_complaints + total_critical + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X2y_withOrders +
## total_cc + X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw,
## family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2466 -0.9274 -0.4355 0.9514 2.1806
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.092761 2.807636 1.102 0.2707
## home_typeMunicipal -0.534879 0.294283 -1.818 0.0691 .
## home_typeNon-Profit 0.048687 0.251311 0.194 0.8464
## number_beds 0.232734 0.050602 4.599 4.24e-06 ***
## short_stayYes -0.180455 0.209066 -0.863 0.3881
## residents_councilYes -0.521754 0.953067 -0.547 0.5841
## family_councilYes 0.159551 0.266011 0.600 0.5486
## accreditationYes -0.234674 0.276638 -0.848 0.3963
## regionEast -0.236248 0.324019 -0.729 0.4659
## regionNorth -0.944141 0.425852 -2.217 0.0266 *
## regionToronto 0.686985 0.483802 1.420 0.1556
## regionWest -0.509040 0.289270 -1.760 0.0785 .
## antipsychotic_percent 0.123557 0.104466 1.183 0.2369
## depression_percent 0.004998 0.010382 0.481 0.6302
## falls_percent -0.012512 0.021555 -0.580 0.5616
## pain_percent -0.129577 0.097639 -1.327 0.1845
## total_inspections -7.635450 4.434500 -1.722 0.0851 .
## X5y_inspections 5.094915 3.765898 1.353 0.1761
## X2y_inspections -1.105348 1.185249 -0.933 0.3510
## total_complaints 0.889025 0.937585 0.948 0.3430
## X2y_complaints -0.232821 0.500932 -0.465 0.6421
## total_critical -0.520950 1.173006 -0.444 0.6570
## X5y_critical 1.458860 1.114064 1.309 0.1904
## X2y_critical -1.326826 0.917013 -1.447 0.1479
## total_noncomplaints 3.856397 2.715546 1.420 0.1556
## X5y_noncomplaints -4.808255 2.570222 -1.871 0.0614 .
## X2y_noncomplaints 1.104618 0.762395 1.449 0.1474
## total_withOrders -0.265594 0.345295 -0.769 0.4418
## X5y_withOrders 0.755039 0.394477 1.914 0.0556 .
## X2y_withOrders 0.133755 0.422690 0.316 0.7517
## total_cc 0.676429 1.462093 0.463 0.6436
## X5y_cc -1.191395 1.151383 -1.035 0.3008
## X2y_cc 2.012299 1.476688 1.363 0.1730
## total_ccw 1.573021 1.802839 0.873 0.3829
## X5y_ccw -0.936432 1.307462 -0.716 0.4739
## X2y_ccw -1.033976 1.096578 -0.943 0.3457
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 695.69 on 579 degrees of freedom
## AIC: 767.69
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_critical)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + depression_percent + falls_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X2y_complaints + X5y_critical + X2y_critical +
## total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints +
## total_withOrders + X5y_withOrders + X2y_withOrders + total_cc +
## X5y_cc + X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2667 -0.9303 -0.4311 0.9567 2.2094
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.878176 2.775590 1.037 0.2998
## home_typeMunicipal -0.531639 0.294213 -1.807 0.0708 .
## home_typeNon-Profit 0.055481 0.250986 0.221 0.8251
## number_beds 0.229874 0.050170 4.582 4.61e-06 ***
## short_stayYes -0.175316 0.208647 -0.840 0.4008
## residents_councilYes -0.540071 0.958702 -0.563 0.5732
## family_councilYes 0.156236 0.265851 0.588 0.5567
## accreditationYes -0.237742 0.276805 -0.859 0.3904
## regionEast -0.230775 0.323602 -0.713 0.4758
## regionNorth -0.932191 0.424930 -2.194 0.0283 *
## regionToronto 0.676336 0.482319 1.402 0.1608
## regionWest -0.499104 0.288146 -1.732 0.0833 .
## antipsychotic_percent 0.123328 0.104553 1.180 0.2382
## depression_percent 0.005069 0.010366 0.489 0.6248
## falls_percent -0.012717 0.021541 -0.590 0.5550
## pain_percent -0.127711 0.097558 -1.309 0.1905
## total_inspections -6.462434 3.554181 -1.818 0.0690 .
## X5y_inspections 4.472021 3.479202 1.285 0.1987
## X2y_inspections -1.085330 1.182591 -0.918 0.3587
## total_complaints 1.171973 0.712002 1.646 0.0998 .
## X2y_complaints -0.266187 0.495383 -0.537 0.5910
## X5y_critical 1.247180 1.001368 1.245 0.2130
## X2y_critical -1.337686 0.917922 -1.457 0.1450
## total_noncomplaints 3.060035 2.036657 1.502 0.1330
## X5y_noncomplaints -4.329196 2.319221 -1.867 0.0619 .
## X2y_noncomplaints 1.091251 0.761249 1.434 0.1517
## total_withOrders -0.202373 0.313578 -0.645 0.5187
## X5y_withOrders 0.707450 0.378232 1.870 0.0614 .
## X2y_withOrders 0.131363 0.424618 0.309 0.7570
## total_cc 0.081704 0.603343 0.135 0.8923
## X5y_cc -1.025557 1.082918 -0.947 0.3436
## X2y_cc 2.029400 1.479307 1.372 0.1701
## total_ccw 1.216711 1.605697 0.758 0.4486
## X5y_ccw -0.777883 1.255114 -0.620 0.5354
## X2y_ccw -1.026584 1.101423 -0.932 0.3513
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 695.88 on 580 degrees of freedom
## AIC: 765.88
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_cc)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + depression_percent + falls_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X2y_complaints + X5y_critical + X2y_critical +
## total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints +
## total_withOrders + X5y_withOrders + X2y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2603 -0.9337 -0.4337 0.9592 2.2207
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.804341 2.723947 1.030 0.3032
## home_typeMunicipal -0.529274 0.293704 -1.802 0.0715 .
## home_typeNon-Profit 0.056724 0.250802 0.226 0.8211
## number_beds 0.230246 0.050091 4.597 4.29e-06 ***
## short_stayYes -0.175140 0.208618 -0.840 0.4012
## residents_councilYes -0.547391 0.958233 -0.571 0.5678
## family_councilYes 0.154790 0.265644 0.583 0.5601
## accreditationYes -0.236160 0.276599 -0.854 0.3932
## regionEast -0.234281 0.322581 -0.726 0.4677
## regionNorth -0.932893 0.425019 -2.195 0.0282 *
## regionToronto 0.679489 0.481844 1.410 0.1585
## regionWest -0.499007 0.288139 -1.732 0.0833 .
## antipsychotic_percent 0.123678 0.104529 1.183 0.2367
## depression_percent 0.004994 0.010350 0.483 0.6294
## falls_percent -0.012711 0.021543 -0.590 0.5552
## pain_percent -0.127622 0.097544 -1.308 0.1908
## total_inspections -6.594359 3.425477 -1.925 0.0542 .
## X5y_inspections 4.487615 3.481494 1.289 0.1974
## X2y_inspections -1.087349 1.182821 -0.919 0.3579
## total_complaints 1.233039 0.552954 2.230 0.0258 *
## X2y_complaints -0.274842 0.491417 -0.559 0.5760
## X5y_critical 1.270735 0.987488 1.287 0.1982
## X2y_critical -1.345058 0.916660 -1.467 0.1423
## total_noncomplaints 3.210968 1.707720 1.880 0.0601 .
## X5y_noncomplaints -4.368816 2.303622 -1.896 0.0579 .
## X2y_noncomplaints 1.085098 0.759714 1.428 0.1532
## total_withOrders -0.230660 0.233867 -0.986 0.3240
## X5y_withOrders 0.724211 0.357667 2.025 0.0429 *
## X2y_withOrders 0.130018 0.424585 0.306 0.7594
## X5y_cc -1.007757 1.077053 -0.936 0.3494
## X2y_cc 2.034773 1.479350 1.375 0.1690
## total_ccw 1.329467 1.375444 0.967 0.3338
## X5y_ccw -0.827639 1.200815 -0.689 0.4907
## X2y_ccw -1.016332 1.098684 -0.925 0.3549
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 695.90 on 581 degrees of freedom
## AIC: 763.9
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + depression_percent + falls_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X2y_complaints + X5y_critical + X2y_critical +
## total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints +
## total_withOrders + X5y_withOrders + X5y_cc + X2y_cc + total_ccw +
## X5y_ccw + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2564 -0.9327 -0.4296 0.9591 2.2380
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.700383 2.695570 1.002 0.3164
## home_typeMunicipal -0.533489 0.293520 -1.818 0.0691 .
## home_typeNon-Profit 0.060137 0.250297 0.240 0.8101
## number_beds 0.232431 0.049612 4.685 2.8e-06 ***
## short_stayYes -0.174508 0.208632 -0.836 0.4029
## residents_councilYes -0.545697 0.951764 -0.573 0.5664
## family_councilYes 0.153696 0.265543 0.579 0.5627
## accreditationYes -0.234102 0.276300 -0.847 0.3968
## regionEast -0.227700 0.321645 -0.708 0.4790
## regionNorth -0.933344 0.425219 -2.195 0.0282 *
## regionToronto 0.681302 0.481733 1.414 0.1573
## regionWest -0.499361 0.288159 -1.733 0.0831 .
## antipsychotic_percent 0.124103 0.104345 1.189 0.2343
## depression_percent 0.005011 0.010349 0.484 0.6282
## falls_percent -0.012767 0.021551 -0.592 0.5536
## pain_percent -0.124983 0.097147 -1.287 0.1983
## total_inspections -6.473664 3.396887 -1.906 0.0567 .
## X5y_inspections 4.564285 3.476293 1.313 0.1892
## X2y_inspections -1.073210 1.182755 -0.907 0.3642
## total_complaints 1.218242 0.549669 2.216 0.0267 *
## X2y_complaints -0.268742 0.491294 -0.547 0.5844
## X5y_critical 1.296286 0.985445 1.315 0.1884
## X2y_critical -1.316457 0.911011 -1.445 0.1484
## total_noncomplaints 3.148178 1.692015 1.861 0.0628 .
## X5y_noncomplaints -4.477747 2.279802 -1.964 0.0495 *
## X2y_noncomplaints 1.090560 0.759593 1.436 0.1511
## total_withOrders -0.228506 0.233684 -0.978 0.3282
## X5y_withOrders 0.756031 0.342283 2.209 0.0272 *
## X5y_cc -1.017847 1.079033 -0.943 0.3455
## X2y_cc 1.755008 1.161371 1.511 0.1307
## total_ccw 1.297022 1.371323 0.946 0.3442
## X5y_ccw -0.864496 1.194333 -0.724 0.4692
## X2y_ccw -0.735698 0.602462 -1.221 0.2220
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 696.00 on 582 degrees of freedom
## AIC: 762
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -depression_percent)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + falls_percent + pain_percent + total_inspections +
## X5y_inspections + X2y_inspections + total_complaints + X2y_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2230 -0.9210 -0.4355 0.9517 2.2353
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.77184 2.69190 1.030 0.3032
## home_typeMunicipal -0.50599 0.28777 -1.758 0.0787 .
## home_typeNon-Profit 0.07516 0.24835 0.303 0.7622
## number_beds 0.22927 0.04911 4.669 3.03e-06 ***
## short_stayYes -0.17413 0.20871 -0.834 0.4041
## residents_councilYes -0.55367 0.95543 -0.580 0.5623
## family_councilYes 0.15128 0.26538 0.570 0.5686
## accreditationYes -0.23268 0.27635 -0.842 0.3998
## regionEast -0.20989 0.31967 -0.657 0.5114
## regionNorth -0.92830 0.42417 -2.189 0.0286 *
## regionToronto 0.66815 0.48067 1.390 0.1645
## regionWest -0.47002 0.28156 -1.669 0.0950 .
## antipsychotic_percent 0.12558 0.10422 1.205 0.2282
## falls_percent -0.01082 0.02113 -0.512 0.6084
## pain_percent -0.12319 0.09704 -1.269 0.2043
## total_inspections -6.46720 3.39713 -1.904 0.0569 .
## X5y_inspections 4.60174 3.47142 1.326 0.1850
## X2y_inspections -1.05464 1.18263 -0.892 0.3725
## total_complaints 1.20207 0.54827 2.192 0.0283 *
## X2y_complaints -0.26575 0.49140 -0.541 0.5886
## X5y_critical 1.30739 0.98281 1.330 0.1834
## X2y_critical -1.29210 0.91040 -1.419 0.1558
## total_noncomplaints 3.12744 1.69193 1.848 0.0645 .
## X5y_noncomplaints -4.50857 2.27522 -1.982 0.0475 *
## X2y_noncomplaints 1.07269 0.75955 1.412 0.1579
## total_withOrders -0.22710 0.23346 -0.973 0.3307
## X5y_withOrders 0.75939 0.34198 2.221 0.0264 *
## X5y_cc -1.00717 1.07745 -0.935 0.3499
## X2y_cc 1.71929 1.15910 1.483 0.1380
## total_ccw 1.32952 1.36867 0.971 0.3314
## X5y_ccw -0.89570 1.19166 -0.752 0.4523
## X2y_ccw -0.72310 0.60110 -1.203 0.2290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 696.23 on 583 degrees of freedom
## AIC: 760.23
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -falls_percent)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + pain_percent + total_inspections +
## X5y_inspections + X2y_inspections + total_complaints + X2y_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2380 -0.9292 -0.4446 0.9558 2.2335
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.68485 2.68461 1.000 0.3173
## home_typeMunicipal -0.51452 0.28715 -1.792 0.0732 .
## home_typeNon-Profit 0.08527 0.24738 0.345 0.7303
## number_beds 0.22905 0.04907 4.667 3.05e-06 ***
## short_stayYes -0.17441 0.20870 -0.836 0.4033
## residents_councilYes -0.55241 0.95739 -0.577 0.5639
## family_councilYes 0.14919 0.26545 0.562 0.5741
## accreditationYes -0.22594 0.27599 -0.819 0.4130
## regionEast -0.21033 0.31912 -0.659 0.5098
## regionNorth -0.91168 0.42321 -2.154 0.0312 *
## regionToronto 0.70790 0.47487 1.491 0.1360
## regionWest -0.47757 0.28104 -1.699 0.0893 .
## antipsychotic_percent 0.11939 0.10357 1.153 0.2490
## pain_percent -0.12982 0.09639 -1.347 0.1780
## total_inspections -6.51963 3.39254 -1.922 0.0546 .
## X5y_inspections 4.63844 3.45924 1.341 0.1800
## X2y_inspections -1.06372 1.18211 -0.900 0.3682
## total_complaints 1.20317 0.54832 2.194 0.0282 *
## X2y_complaints -0.26489 0.49151 -0.539 0.5899
## X5y_critical 1.31296 0.97840 1.342 0.1796
## X2y_critical -1.29520 0.91045 -1.423 0.1549
## total_noncomplaints 3.14885 1.69032 1.863 0.0625 .
## X5y_noncomplaints -4.55939 2.26329 -2.014 0.0440 *
## X2y_noncomplaints 1.07998 0.75848 1.424 0.1545
## total_withOrders -0.23397 0.23314 -1.004 0.3156
## X5y_withOrders 0.76615 0.34151 2.243 0.0249 *
## X5y_cc -1.00972 1.07417 -0.940 0.3472
## X2y_cc 1.72565 1.15882 1.489 0.1364
## total_ccw 1.36840 1.36440 1.003 0.3159
## X5y_ccw -0.90858 1.18950 -0.764 0.4450
## X2y_ccw -0.73282 0.60057 -1.220 0.2224
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 696.49 on 584 degrees of freedom
## AIC: 758.49
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## residents_council + family_council + accreditation + region +
## antipsychotic_percent + pain_percent + total_inspections +
## X5y_inspections + X2y_inspections + total_complaints + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2408 -0.9284 -0.4264 0.9466 2.2079
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.70022 2.68973 1.004 0.3154
## home_typeMunicipal -0.51859 0.28681 -1.808 0.0706 .
## home_typeNon-Profit 0.09288 0.24706 0.376 0.7070
## number_beds 0.22716 0.04891 4.645 3.41e-06 ***
## short_stayYes -0.17183 0.20859 -0.824 0.4101
## residents_councilYes -0.55943 0.95964 -0.583 0.5599
## family_councilYes 0.15559 0.26536 0.586 0.5577
## accreditationYes -0.23562 0.27537 -0.856 0.3922
## regionEast -0.20664 0.31876 -0.648 0.5168
## regionNorth -0.92449 0.42277 -2.187 0.0288 *
## regionToronto 0.71427 0.47492 1.504 0.1326
## regionWest -0.48419 0.28064 -1.725 0.0845 .
## antipsychotic_percent 0.12110 0.10340 1.171 0.2415
## pain_percent -0.12996 0.09649 -1.347 0.1780
## total_inspections -6.36560 3.38369 -1.881 0.0599 .
## X5y_inspections 4.40330 3.44056 1.280 0.2006
## X2y_inspections -1.18816 1.15711 -1.027 0.3045
## total_complaints 1.16791 0.54524 2.142 0.0322 *
## X5y_critical 1.23688 0.97128 1.273 0.2029
## X2y_critical -1.00602 0.73034 -1.377 0.1684
## total_noncomplaints 3.08693 1.68784 1.829 0.0674 .
## X5y_noncomplaints -4.40094 2.24900 -1.957 0.0504 .
## X2y_noncomplaints 1.14661 0.74676 1.535 0.1247
## total_withOrders -0.23777 0.23268 -1.022 0.3068
## X5y_withOrders 0.76294 0.34117 2.236 0.0253 *
## X5y_cc -0.93633 1.06922 -0.876 0.3812
## X2y_cc 1.37304 0.95329 1.440 0.1498
## total_ccw 1.36559 1.36139 1.003 0.3158
## X5y_ccw -0.86630 1.18779 -0.729 0.4658
## X2y_ccw -0.75421 0.59871 -1.260 0.2078
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 696.78 on 585 degrees of freedom
## AIC: 756.78
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -residents_council)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + short_stay +
## family_council + accreditation + region + antipsychotic_percent +
## pain_percent + total_inspections + X5y_inspections + X2y_inspections +
## total_complaints + X5y_critical + X2y_critical + total_noncomplaints +
## X5y_noncomplaints + X2y_noncomplaints + total_withOrders +
## X5y_withOrders + X5y_cc + X2y_cc + total_ccw + X5y_ccw +
## X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0221 -0.9313 -0.4281 0.9548 2.2070
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.34095 2.62083 0.893 0.3717
## home_typeMunicipal -0.52684 0.28638 -1.840 0.0658 .
## home_typeNon-Profit 0.09582 0.24662 0.389 0.6976
## number_beds 0.22857 0.04884 4.680 2.87e-06 ***
## short_stayYes -0.17836 0.20836 -0.856 0.3920
## family_councilYes 0.14207 0.26349 0.539 0.5898
## accreditationYes -0.25099 0.27382 -0.917 0.3593
## regionEast -0.20881 0.31851 -0.656 0.5121
## regionNorth -0.92023 0.42321 -2.174 0.0297 *
## regionToronto 0.70301 0.47449 1.482 0.1384
## regionWest -0.48112 0.28029 -1.717 0.0861 .
## antipsychotic_percent 0.12472 0.10348 1.205 0.2281
## pain_percent -0.13262 0.09636 -1.376 0.1688
## total_inspections -6.61085 3.36414 -1.965 0.0494 *
## X5y_inspections 4.58554 3.42675 1.338 0.1808
## X2y_inspections -1.12016 1.15367 -0.971 0.3316
## total_complaints 1.19999 0.54422 2.205 0.0275 *
## X5y_critical 1.29858 0.96688 1.343 0.1793
## X2y_critical -0.99339 0.73004 -1.361 0.1736
## total_noncomplaints 3.16244 1.68612 1.876 0.0607 .
## X5y_noncomplaints -4.51485 2.24214 -2.014 0.0440 *
## X2y_noncomplaints 1.11827 0.74592 1.499 0.1338
## total_withOrders -0.23648 0.23275 -1.016 0.3096
## X5y_withOrders 0.77367 0.34132 2.267 0.0234 *
## X5y_cc -0.96161 1.07026 -0.898 0.3689
## X2y_cc 1.30754 0.94794 1.379 0.1678
## total_ccw 1.43003 1.35655 1.054 0.2918
## X5y_ccw -0.94048 1.17967 -0.797 0.4253
## X2y_ccw -0.73721 0.59859 -1.232 0.2181
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 697.14 on 586 degrees of freedom
## AIC: 755.14
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -short_stay)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + X2y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X5y_ccw + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0332 -0.9223 -0.4290 0.9560 2.2214
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.43153 2.61469 0.930 0.3524
## home_typeMunicipal -0.54425 0.28569 -1.905 0.0568 .
## home_typeNon-Profit 0.09104 0.24630 0.370 0.7117
## number_beds 0.22318 0.04841 4.610 4.02e-06 ***
## family_councilYes 0.14100 0.26358 0.535 0.5927
## accreditationYes -0.26201 0.27349 -0.958 0.3381
## regionEast -0.21343 0.31876 -0.670 0.5031
## regionNorth -0.94946 0.42105 -2.255 0.0241 *
## regionToronto 0.68619 0.47519 1.444 0.1487
## regionWest -0.47474 0.27996 -1.696 0.0899 .
## antipsychotic_percent 0.12357 0.10351 1.194 0.2326
## pain_percent -0.13585 0.09625 -1.411 0.1581
## total_inspections -6.65872 3.35491 -1.985 0.0472 *
## X5y_inspections 4.53577 3.41497 1.328 0.1841
## X2y_inspections -1.10037 1.15078 -0.956 0.3390
## total_complaints 1.22006 0.54252 2.249 0.0245 *
## X5y_critical 1.26720 0.96436 1.314 0.1888
## X2y_critical -0.94208 0.72653 -1.297 0.1947
## total_noncomplaints 3.22708 1.68026 1.921 0.0548 .
## X5y_noncomplaints -4.46926 2.23469 -2.000 0.0455 *
## X2y_noncomplaints 1.07155 0.74189 1.444 0.1486
## total_withOrders -0.22538 0.23203 -0.971 0.3314
## X5y_withOrders 0.77714 0.34108 2.278 0.0227 *
## X5y_cc -0.94166 1.06740 -0.882 0.3777
## X2y_cc 1.30188 0.94607 1.376 0.1688
## total_ccw 1.37564 1.35515 1.015 0.3100
## X5y_ccw -0.93057 1.17959 -0.789 0.4302
## X2y_ccw -0.74280 0.59771 -1.243 0.2140
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 697.87 on 587 degrees of freedom
## AIC: 753.87
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_ccw)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + X2y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + total_ccw + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0395 -0.9261 -0.4417 0.9620 2.2337
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.67816 2.59524 1.032 0.3021
## home_typeMunicipal -0.52996 0.28495 -1.860 0.0629 .
## home_typeNon-Profit 0.08109 0.24592 0.330 0.7416
## number_beds 0.22237 0.04830 4.604 4.15e-06 ***
## family_councilYes 0.16265 0.26192 0.621 0.5346
## accreditationYes -0.25758 0.27276 -0.944 0.3450
## regionEast -0.20441 0.31851 -0.642 0.5210
## regionNorth -0.92949 0.42029 -2.212 0.0270 *
## regionToronto 0.71512 0.47493 1.506 0.1321
## regionWest -0.48396 0.27963 -1.731 0.0835 .
## antipsychotic_percent 0.11826 0.10287 1.150 0.2503
## pain_percent -0.14055 0.09584 -1.467 0.1425
## total_inspections -5.94024 3.21699 -1.847 0.0648 .
## X5y_inspections 2.96074 2.74133 1.080 0.2801
## X2y_inspections -1.36052 1.10071 -1.236 0.2164
## total_complaints 1.22676 0.54218 2.263 0.0237 *
## X5y_critical 1.01173 0.89954 1.125 0.2607
## X2y_critical -1.10173 0.69904 -1.576 0.1150
## total_noncomplaints 3.11111 1.67023 1.863 0.0625 .
## X5y_noncomplaints -3.71594 1.99628 -1.861 0.0627 .
## X2y_noncomplaints 1.25966 0.70269 1.793 0.0730 .
## total_withOrders -0.19056 0.22776 -0.837 0.4028
## X5y_withOrders 0.63175 0.28416 2.223 0.0262 *
## X5y_cc -0.80003 1.04376 -0.766 0.4434
## X2y_cc 1.51841 0.90623 1.676 0.0938 .
## total_ccw 0.79024 1.12149 0.705 0.4810
## X2y_ccw -0.79088 0.59374 -1.332 0.1829
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 698.49 on 588 degrees of freedom
## AIC: 752.49
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_ccw)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + X2y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + total_withOrders + X5y_withOrders + X5y_cc +
## X2y_cc + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0018 -0.9220 -0.4382 0.9549 2.2129
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.80939 2.26973 0.797 0.4253
## home_typeMunicipal -0.52533 0.28506 -1.843 0.0653 .
## home_typeNon-Profit 0.06322 0.24423 0.259 0.7957
## number_beds 0.22766 0.04777 4.766 1.88e-06 ***
## family_councilYes 0.16338 0.26153 0.625 0.5322
## accreditationYes -0.25514 0.27228 -0.937 0.3487
## regionEast -0.20687 0.31817 -0.650 0.5156
## regionNorth -0.91735 0.42016 -2.183 0.0290 *
## regionToronto 0.67056 0.46894 1.430 0.1527
## regionWest -0.47206 0.27884 -1.693 0.0905 .
## antipsychotic_percent 0.12103 0.10281 1.177 0.2391
## pain_percent -0.13953 0.09562 -1.459 0.1445
## total_inspections -4.43748 2.39363 -1.854 0.0638 .
## X5y_inspections 2.94857 2.73987 1.076 0.2818
## X2y_inspections -1.17819 1.06654 -1.105 0.2693
## total_complaints 1.11110 0.51307 2.166 0.0303 *
## X5y_critical 1.11279 0.88943 1.251 0.2109
## X2y_critical -1.02568 0.68789 -1.491 0.1359
## total_noncomplaints 2.65824 1.53338 1.734 0.0830 .
## X5y_noncomplaints -3.86366 1.98644 -1.945 0.0518 .
## X2y_noncomplaints 1.12484 0.67410 1.669 0.0952 .
## total_withOrders -0.14713 0.21946 -0.670 0.5026
## X5y_withOrders 0.65681 0.28191 2.330 0.0198 *
## X5y_cc -0.81335 1.04359 -0.779 0.4358
## X2y_cc 1.37845 0.88297 1.561 0.1185
## X2y_ccw -0.72918 0.58671 -1.243 0.2139
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 698.99 on 589 degrees of freedom
## AIC: 750.99
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + X2y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + X5y_withOrders + X5y_cc + X2y_cc + X2y_ccw,
## family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9696 -0.9239 -0.4421 0.9624 2.2042
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.04860 2.24243 0.914 0.36095
## home_typeMunicipal -0.51583 0.28446 -1.813 0.06977 .
## home_typeNon-Profit 0.05386 0.24395 0.221 0.82527
## number_beds 0.22821 0.04780 4.774 1.8e-06 ***
## family_councilYes 0.16993 0.26120 0.651 0.51533
## accreditationYes -0.25533 0.27227 -0.938 0.34835
## regionEast -0.21046 0.31779 -0.662 0.50780
## regionNorth -0.92606 0.42047 -2.202 0.02763 *
## regionToronto 0.66895 0.46926 1.426 0.15400
## regionWest -0.44010 0.27456 -1.603 0.10895
## antipsychotic_percent 0.11455 0.10231 1.120 0.26287
## pain_percent -0.13978 0.09573 -1.460 0.14426
## total_inspections -4.47687 2.39477 -1.869 0.06156 .
## X5y_inspections 2.94013 2.74932 1.069 0.28489
## X2y_inspections -1.15446 1.06732 -1.082 0.27941
## total_complaints 1.09651 0.51258 2.139 0.03242 *
## X5y_critical 1.12913 0.89243 1.265 0.20579
## X2y_critical -1.03111 0.69077 -1.493 0.13552
## total_noncomplaints 2.52636 1.52126 1.661 0.09677 .
## X5y_noncomplaints -3.82543 1.99178 -1.921 0.05478 .
## X2y_noncomplaints 1.12171 0.67575 1.660 0.09692 .
## X5y_withOrders 0.52769 0.20441 2.582 0.00984 **
## X5y_cc -0.77946 1.04659 -0.745 0.45641
## X2y_cc 1.35645 0.88432 1.534 0.12506
## X2y_ccw -0.71339 0.58634 -1.217 0.22373
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 699.44 on 590 degrees of freedom
## AIC: 749.44
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_cc)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + X2y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0194 -0.9314 -0.4577 0.9705 2.1533
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.36640 2.20105 1.075 0.2823
## home_typeMunicipal -0.52079 0.28438 -1.831 0.0671 .
## home_typeNon-Profit 0.05276 0.24394 0.216 0.8288
## number_beds 0.23033 0.04772 4.826 1.39e-06 ***
## family_councilYes 0.15988 0.26053 0.614 0.5394
## accreditationYes -0.25488 0.27202 -0.937 0.3488
## regionEast -0.19731 0.31702 -0.622 0.5337
## regionNorth -0.91479 0.41974 -2.179 0.0293 *
## regionToronto 0.65225 0.46847 1.392 0.1638
## regionWest -0.44717 0.27400 -1.632 0.1027
## antipsychotic_percent 0.11266 0.10223 1.102 0.2705
## pain_percent -0.13938 0.09582 -1.455 0.1458
## total_inspections -3.70803 2.14379 -1.730 0.0837 .
## X5y_inspections 1.02778 0.96551 1.064 0.2871
## X2y_inspections -0.79182 0.95036 -0.833 0.4047
## total_complaints 0.91716 0.44790 2.048 0.0406 *
## X5y_critical 0.54386 0.41390 1.314 0.1888
## X2y_critical -0.82836 0.63367 -1.307 0.1911
## total_noncomplaints 2.09217 1.39815 1.496 0.1346
## X5y_noncomplaints -2.57157 1.04837 -2.453 0.0142 *
## X2y_noncomplaints 0.95481 0.63643 1.500 0.1335
## X5y_withOrders 0.52460 0.20415 2.570 0.0102 *
## X2y_cc 1.06663 0.79609 1.340 0.1803
## X2y_ccw -0.69675 0.58615 -1.189 0.2346
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 699.99 on 591 degrees of freedom
## AIC: 747.99
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_inspections)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## accreditation + region + antipsychotic_percent + pain_percent +
## total_inspections + X5y_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0210 -0.9361 -0.4460 0.9653 2.1777
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.91152 2.11370 1.377 0.16837
## home_typeMunicipal -0.50632 0.28305 -1.789 0.07365 .
## home_typeNon-Profit 0.07548 0.24215 0.312 0.75525
## number_beds 0.22687 0.04736 4.791 1.66e-06 ***
## family_councilYes 0.16354 0.26036 0.628 0.52992
## accreditationYes -0.25708 0.27212 -0.945 0.34480
## regionEast -0.18226 0.31628 -0.576 0.56445
## regionNorth -0.91243 0.41784 -2.184 0.02899 *
## regionToronto 0.69147 0.46902 1.474 0.14041
## regionWest -0.44141 0.27375 -1.612 0.10687
## antipsychotic_percent 0.11375 0.10253 1.109 0.26726
## pain_percent -0.13891 0.09602 -1.447 0.14797
## total_inspections -4.21445 2.06271 -2.043 0.04104 *
## X5y_inspections 0.92425 0.95695 0.966 0.33413
## total_complaints 1.02301 0.43169 2.370 0.01780 *
## X5y_critical 0.61442 0.40544 1.515 0.12966
## X2y_critical -0.58086 0.55575 -1.045 0.29593
## total_noncomplaints 2.38843 1.35409 1.764 0.07776 .
## X5y_noncomplaints -2.60186 1.04965 -2.479 0.01318 *
## X2y_noncomplaints 0.59879 0.46588 1.285 0.19869
## X5y_withOrders 0.55289 0.20127 2.747 0.00601 **
## X2y_cc 0.70909 0.66439 1.067 0.28584
## X2y_ccw -0.78258 0.57518 -1.361 0.17364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 700.68 on 592 degrees of freedom
## AIC: 746.68
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -accreditation)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + antipsychotic_percent + pain_percent + total_inspections +
## X5y_inspections + total_complaints + X5y_critical + X2y_critical +
## total_noncomplaints + X5y_noncomplaints + X2y_noncomplaints +
## X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0428 -0.9386 -0.4578 0.9689 2.1399
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.89085 2.11416 1.367 0.17151
## home_typeMunicipal -0.44474 0.27573 -1.613 0.10675
## home_typeNon-Profit 0.14486 0.23062 0.628 0.52992
## number_beds 0.22090 0.04684 4.716 2.41e-06 ***
## family_councilYes 0.13525 0.25756 0.525 0.59950
## regionEast -0.18854 0.31584 -0.597 0.55053
## regionNorth -0.92557 0.41761 -2.216 0.02667 *
## regionToronto 0.68540 0.46935 1.460 0.14420
## regionWest -0.41931 0.27233 -1.540 0.12363
## antipsychotic_percent 0.11407 0.10216 1.117 0.26417
## pain_percent -0.13908 0.09579 -1.452 0.14651
## total_inspections -4.35228 2.05768 -2.115 0.03442 *
## X5y_inspections 0.94249 0.95518 0.987 0.32378
## total_complaints 1.04487 0.43090 2.425 0.01531 *
## X5y_critical 0.63273 0.40446 1.564 0.11773
## X2y_critical -0.59673 0.55544 -1.074 0.28267
## total_noncomplaints 2.47423 1.35108 1.831 0.06706 .
## X5y_noncomplaints -2.62914 1.04882 -2.507 0.01218 *
## X2y_noncomplaints 0.61488 0.46571 1.320 0.18673
## X5y_withOrders 0.55432 0.20068 2.762 0.00574 **
## X2y_cc 0.69907 0.66311 1.054 0.29177
## X2y_ccw -0.78554 0.57386 -1.369 0.17104
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 701.57 on 593 degrees of freedom
## AIC: 745.57
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_inspections)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + antipsychotic_percent + pain_percent + total_inspections +
## total_complaints + X5y_critical + X2y_critical + total_noncomplaints +
## X5y_noncomplaints + X2y_noncomplaints + X5y_withOrders +
## X2y_cc + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9985 -0.9303 -0.4649 0.9575 2.1177
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.99094 2.10654 1.420 0.15566
## home_typeMunicipal -0.42710 0.27481 -1.554 0.12014
## home_typeNon-Profit 0.15064 0.23037 0.654 0.51317
## number_beds 0.21747 0.04663 4.664 3.1e-06 ***
## family_councilYes 0.12962 0.25704 0.504 0.61407
## regionEast -0.16847 0.31506 -0.535 0.59285
## regionNorth -0.91004 0.41719 -2.181 0.02916 *
## regionToronto 0.63731 0.46520 1.370 0.17070
## regionWest -0.39425 0.27099 -1.455 0.14572
## antipsychotic_percent 0.11458 0.10199 1.123 0.26127
## pain_percent -0.13181 0.09541 -1.381 0.16714
## total_inspections -3.90122 2.00008 -1.951 0.05111 .
## total_complaints 1.04607 0.42982 2.434 0.01494 *
## X5y_critical 0.70282 0.39941 1.760 0.07847 .
## X2y_critical -0.77999 0.52457 -1.487 0.13704
## total_noncomplaints 2.14551 1.30506 1.644 0.10018
## X5y_noncomplaints -1.93897 0.78059 -2.484 0.01299 *
## X2y_noncomplaints 0.57237 0.46430 1.233 0.21766
## X5y_withOrders 0.57058 0.19977 2.856 0.00429 **
## X2y_cc 0.91083 0.62842 1.449 0.14722
## X2y_ccw -0.78355 0.57421 -1.365 0.17239
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 702.55 on 594 degrees of freedom
## AIC: 744.55
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -antipsychotic_percent)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + pain_percent + total_inspections + total_complaints +
## X5y_critical + X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9797 -0.9334 -0.4462 0.9671 2.0869
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.50303 2.05956 1.701 0.08897 .
## home_typeMunicipal -0.43040 0.27433 -1.569 0.11666
## home_typeNon-Profit 0.13309 0.22975 0.579 0.56240
## number_beds 0.21918 0.04657 4.707 2.52e-06 ***
## family_councilYes 0.13572 0.25630 0.530 0.59643
## regionEast -0.16780 0.31541 -0.532 0.59473
## regionNorth -0.89599 0.41596 -2.154 0.03124 *
## regionToronto 0.64095 0.46432 1.380 0.16746
## regionWest -0.37520 0.27017 -1.389 0.16491
## pain_percent -0.12152 0.09488 -1.281 0.20030
## total_inspections -3.95774 2.00034 -1.979 0.04787 *
## total_complaints 1.05673 0.43007 2.457 0.01401 *
## X5y_critical 0.68811 0.39908 1.724 0.08467 .
## X2y_critical -0.82278 0.52237 -1.575 0.11524
## total_noncomplaints 2.12001 1.30545 1.624 0.10438
## X5y_noncomplaints -1.89222 0.78040 -2.425 0.01532 *
## X2y_noncomplaints 0.62045 0.46180 1.344 0.17909
## X5y_withOrders 0.57288 0.19985 2.867 0.00415 **
## X2y_cc 0.95309 0.62560 1.523 0.12764
## X2y_ccw -0.80860 0.57235 -1.413 0.15772
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 703.82 on 595 degrees of freedom
## AIC: 743.82
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -pain_percent)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X2y_noncomplaints + X5y_withOrders + X2y_cc + X2y_ccw, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0027 -0.9297 -0.4657 0.9559 2.0742
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.09072 2.03063 1.522 0.12800
## home_typeMunicipal -0.45334 0.27324 -1.659 0.09709 .
## home_typeNon-Profit 0.12486 0.22930 0.545 0.58607
## number_beds 0.22697 0.04612 4.921 8.59e-07 ***
## family_councilYes 0.10801 0.25521 0.423 0.67212
## regionEast -0.20528 0.31306 -0.656 0.51200
## regionNorth -1.00268 0.40810 -2.457 0.01401 *
## regionToronto 0.65151 0.46432 1.403 0.16057
## regionWest -0.42788 0.26659 -1.605 0.10848
## total_inspections -3.79647 1.99121 -1.907 0.05657 .
## total_complaints 1.03657 0.42953 2.413 0.01581 *
## X5y_critical 0.66998 0.39873 1.680 0.09290 .
## X2y_critical -0.81205 0.52060 -1.560 0.11880
## total_noncomplaints 2.04161 1.29950 1.571 0.11617
## X5y_noncomplaints -1.93381 0.77803 -2.486 0.01294 *
## X2y_noncomplaints 0.64394 0.45948 1.401 0.16108
## X5y_withOrders 0.57761 0.19985 2.890 0.00385 **
## X2y_cc 0.98048 0.62390 1.572 0.11606
## X2y_ccw -0.85041 0.57074 -1.490 0.13622
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 705.47 on 596 degrees of freedom
## AIC: 743.47
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_noncomplaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X5y_withOrders + X2y_cc + X2y_ccw, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0273 -0.9286 -0.4827 0.9758 2.1461
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.82809 2.01300 1.405 0.1600
## home_typeMunicipal -0.43404 0.27212 -1.595 0.1107
## home_typeNon-Profit 0.13610 0.22849 0.596 0.5514
## number_beds 0.22667 0.04599 4.929 8.28e-07 ***
## family_councilYes 0.10265 0.25497 0.403 0.6872
## regionEast -0.23724 0.31170 -0.761 0.4466
## regionNorth -0.94425 0.40530 -2.330 0.0198 *
## regionToronto 0.62598 0.46420 1.349 0.1775
## regionWest -0.42757 0.26584 -1.608 0.1078
## total_inspections -4.00644 1.98624 -2.017 0.0437 *
## total_complaints 1.06292 0.42950 2.475 0.0133 *
## X5y_critical 0.52248 0.38282 1.365 0.1723
## X2y_critical -0.33414 0.39207 -0.852 0.3941
## total_noncomplaints 2.23947 1.29320 1.732 0.0833 .
## X5y_noncomplaints -1.46195 0.69526 -2.103 0.0355 *
## X5y_withOrders 0.52105 0.19486 2.674 0.0075 **
## X2y_cc 0.52085 0.52998 0.983 0.3257
## X2y_ccw -0.36546 0.45281 -0.807 0.4196
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 707.46 on 597 degrees of freedom
## AIC: 743.46
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_ccw)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X5y_withOrders + X2y_cc, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0287 -0.9291 -0.4727 0.9705 2.1238
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.87118 2.00789 1.430 0.1527
## home_typeMunicipal -0.44120 0.27155 -1.625 0.1042
## home_typeNon-Profit 0.12881 0.22844 0.564 0.5728
## number_beds 0.22809 0.04596 4.963 6.96e-07 ***
## family_councilYes 0.10866 0.25437 0.427 0.6693
## regionEast -0.21321 0.31018 -0.687 0.4919
## regionNorth -0.91458 0.40280 -2.271 0.0232 *
## regionToronto 0.60258 0.46302 1.301 0.1931
## regionWest -0.41806 0.26532 -1.576 0.1151
## total_inspections -4.01902 1.98189 -2.028 0.0426 *
## total_complaints 1.06613 0.42814 2.490 0.0128 *
## X5y_critical 0.56047 0.37967 1.476 0.1399
## X2y_critical -0.37373 0.38919 -0.960 0.3369
## total_noncomplaints 2.24408 1.29159 1.737 0.0823 .
## X5y_noncomplaints -1.48109 0.69475 -2.132 0.0330 *
## X5y_withOrders 0.43435 0.16217 2.678 0.0074 **
## X2y_cc 0.16527 0.29230 0.565 0.5718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 708.11 on 598 degrees of freedom
## AIC: 742.11
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_cc)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_critical +
## X2y_critical + total_noncomplaints + X5y_noncomplaints +
## X5y_withOrders, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0514 -0.9323 -0.4752 0.9748 2.1478
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.95484 2.00303 1.475 0.14016
## home_typeMunicipal -0.44372 0.27165 -1.633 0.10237
## home_typeNon-Profit 0.13273 0.22808 0.582 0.56060
## number_beds 0.23267 0.04527 5.140 2.75e-07 ***
## family_councilYes 0.11070 0.25446 0.435 0.66354
## regionEast -0.18458 0.30571 -0.604 0.54600
## regionNorth -0.90698 0.40225 -2.255 0.02415 *
## regionToronto 0.58071 0.46084 1.260 0.20763
## regionWest -0.41830 0.26547 -1.576 0.11509
## total_inspections -4.01728 1.98229 -2.027 0.04270 *
## total_complaints 1.10315 0.42343 2.605 0.00918 **
## X5y_critical 0.57081 0.37879 1.507 0.13183
## X2y_critical -0.23330 0.29942 -0.779 0.43588
## total_noncomplaints 2.19030 1.28801 1.701 0.08903 .
## X5y_noncomplaints -1.48421 0.69472 -2.136 0.03265 *
## X5y_withOrders 0.44466 0.16127 2.757 0.00583 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 708.43 on 599 degrees of freedom
## AIC: 740.43
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_critical)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_critical +
## total_noncomplaints + X5y_noncomplaints + X5y_withOrders,
## family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0631 -0.9339 -0.4628 0.9761 2.1563
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.87600 1.99363 1.443 0.14914
## home_typeMunicipal -0.45309 0.27132 -1.670 0.09493 .
## home_typeNon-Profit 0.12590 0.22767 0.553 0.58026
## number_beds 0.23159 0.04523 5.121 3.04e-07 ***
## family_councilYes 0.11552 0.25448 0.454 0.64987
## regionEast -0.12607 0.29631 -0.425 0.67051
## regionNorth -0.87346 0.40002 -2.184 0.02900 *
## regionToronto 0.59382 0.46095 1.288 0.19767
## regionWest -0.39703 0.26401 -1.504 0.13263
## total_inspections -4.14821 1.97248 -2.103 0.03546 *
## total_complaints 1.13624 0.42088 2.700 0.00694 **
## X5y_critical 0.39421 0.30216 1.305 0.19201
## total_noncomplaints 2.28821 1.28094 1.786 0.07404 .
## X5y_noncomplaints -1.44184 0.69191 -2.084 0.03717 *
## X5y_withOrders 0.43556 0.16056 2.713 0.00667 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 709.04 on 600 degrees of freedom
## AIC: 739.04
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_critical)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + total_noncomplaints +
## X5y_noncomplaints + X5y_withOrders, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0427 -0.9397 -0.4745 0.9736 2.1326
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.17270 1.91988 1.132 0.25777
## home_typeMunicipal -0.44868 0.27055 -1.658 0.09723 .
## home_typeNon-Profit 0.11171 0.22680 0.493 0.62232
## number_beds 0.24547 0.04404 5.574 2.49e-08 ***
## family_councilYes 0.12014 0.25414 0.473 0.63641
## regionEast -0.10225 0.29512 -0.346 0.72899
## regionNorth -0.87908 0.39953 -2.200 0.02779 *
## regionToronto 0.59456 0.45960 1.294 0.19579
## regionWest -0.38521 0.26329 -1.463 0.14345
## total_inspections -4.44497 1.95868 -2.269 0.02325 *
## total_complaints 1.20621 0.41784 2.887 0.00389 **
## total_noncomplaints 2.55459 1.26365 2.022 0.04322 *
## X5y_noncomplaints -0.75877 0.45154 -1.680 0.09288 .
## X5y_withOrders 0.31796 0.13232 2.403 0.01626 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 710.76 on 601 degrees of freedom
## AIC: 738.76
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_noncomplaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + total_noncomplaints +
## X5y_withOrders, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0095 -0.9504 -0.4767 0.9919 2.1132
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.88897 1.91095 0.988 0.32291
## home_typeMunicipal -0.44455 0.27019 -1.645 0.09991 .
## home_typeNon-Profit 0.12092 0.22553 0.536 0.59186
## number_beds 0.23716 0.04348 5.455 4.91e-08 ***
## family_councilYes 0.13028 0.25404 0.513 0.60807
## regionEast -0.22078 0.28461 -0.776 0.43791
## regionNorth -0.93325 0.39813 -2.344 0.01908 *
## regionToronto 0.52876 0.45531 1.161 0.24551
## regionWest -0.37431 0.26198 -1.429 0.15306
## total_inspections -4.36075 1.95984 -2.225 0.02608 *
## total_complaints 1.19345 0.41824 2.854 0.00432 **
## total_noncomplaints 2.04527 1.22706 1.667 0.09555 .
## X5y_withOrders 0.21369 0.11596 1.843 0.06536 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 713.60 on 602 degrees of freedom
## AIC: 739.6
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_noncomplaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints + X5y_withOrders,
## family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9451 -0.9550 -0.4645 0.9864 2.0832
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.66077 1.12918 -0.585 0.55843
## home_typeMunicipal -0.44994 0.26975 -1.668 0.09531 .
## home_typeNon-Profit 0.15183 0.22511 0.674 0.50002
## number_beds 0.23448 0.04330 5.416 6.1e-08 ***
## family_councilYes 0.09182 0.25283 0.363 0.71647
## regionEast -0.19619 0.28466 -0.689 0.49070
## regionNorth -0.88085 0.39757 -2.216 0.02672 *
## regionToronto 0.57336 0.46356 1.237 0.21614
## regionWest -0.33647 0.26206 -1.284 0.19916
## total_inspections -1.22701 0.50821 -2.414 0.01576 *
## total_complaints 0.58166 0.18416 3.158 0.00159 **
## X5y_withOrders 0.21870 0.11480 1.905 0.05677 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 716.44 on 603 degrees of freedom
## AIC: 740.44
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_inspections + total_complaints, family = binomial,
## data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0188 -0.9600 -0.4620 0.9789 2.0712
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.21539 1.08491 -1.120 0.2626
## home_typeMunicipal -0.48239 0.26858 -1.796 0.0725 .
## home_typeNon-Profit 0.19029 0.22352 0.851 0.3946
## number_beds 0.23040 0.04295 5.364 8.13e-08 ***
## family_councilYes 0.11959 0.25154 0.475 0.6345
## regionEast -0.29927 0.27835 -1.075 0.2823
## regionNorth -0.79876 0.39239 -2.036 0.0418 *
## regionToronto 0.47976 0.45974 1.044 0.2967
## regionWest -0.41542 0.25858 -1.607 0.1082
## total_inspections -0.89250 0.47219 -1.890 0.0587 .
## total_complaints 0.54281 0.18160 2.989 0.0028 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 720.12 on 604 degrees of freedom
## AIC: 742.12
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_inspections)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + family_council +
## region + total_complaints, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.9936 -0.9639 -0.4442 0.9950 2.0009
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.07844 0.47856 -6.433 1.25e-10 ***
## home_typeMunicipal -0.51366 0.26651 -1.927 0.05393 .
## home_typeNon-Profit 0.20671 0.22315 0.926 0.35429
## number_beds 0.22264 0.04264 5.221 1.78e-07 ***
## family_councilYes 0.11749 0.25168 0.467 0.64062
## regionEast -0.45305 0.26586 -1.704 0.08837 .
## regionNorth -0.99635 0.38084 -2.616 0.00889 **
## regionToronto 0.60953 0.45451 1.341 0.17989
## regionWest -0.51509 0.25338 -2.033 0.04206 *
## total_complaints 0.24279 0.08404 2.889 0.00387 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 723.78 on 605 degrees of freedom
## AIC: 743.78
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -home_type)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ number_beds + family_council + region +
## total_complaints, family = binomial, data = data)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.0172 -0.9768 -0.4551 1.0108 2.0083
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.85512 0.46241 -6.174 6.64e-10 ***
## number_beds 0.19801 0.03993 4.959 7.07e-07 ***
## family_councilYes 0.10140 0.25050 0.405 0.68561
## regionEast -0.50000 0.26405 -1.894 0.05828 .
## regionNorth -1.03173 0.37660 -2.740 0.00615 **
## regionToronto 0.67297 0.45206 1.489 0.13657
## regionWest -0.57752 0.25091 -2.302 0.02135 *
## total_complaints 0.25978 0.08097 3.208 0.00134 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 729.86 on 607 degrees of freedom
## AIC: 745.86
##
## Number of Fisher Scoring iterations: 4
As shown above when all variables are included in the initial regression and then backwards selection is used for variable selection, the significant variables are number of beds, total complaints and region.
# Calculate McFadden's pseudo R2
ll.null <- fit$null.deviance/-2
ll.proposed <- fit$deviance/-2
print((ll.null - ll.proposed)/ll.null)
## [1] 0.1423571
1 - pchisq(2*(ll.proposed - ll.null), df = (length(fit$coefficients)-1)) #pvalue
## [1] 0
# Create a new dataframe showing probability of outbreak and outbreak status
predicted.data <- data.frame(probability.of.outbreak=fit$fitted.values, outbreak=data$outbreak)
# Sort the above df
predicted.data <- predicted.data[order(predicted.data$probability.of.outbreak, decreasing=FALSE),]
# Add new col to df with rank
predicted.data$rank <- 1:nrow(predicted.data)
library(ggplot2)
library(cowplot)
##
## ********************************************************
## Note: As of version 1.0.0, cowplot does not change the
## default ggplot2 theme anymore. To recover the previous
## behavior, execute:
## theme_set(theme_cowplot())
## ********************************************************
ggplot(data = predicted.data, aes(x=rank, y=probability.of.outbreak)) +
geom_point(aes(color=outbreak), alpha = 0.5, shape = 4, stroke = 1) +
xlab("Index") + ylab("Predicted probability of an outbreak") +
ggtitle("Outbreak Status Ordered By Predicted Probability of an Outbreak") +
scale_color_manual(values = c('blue', 'red'))
data_selected <- subset(df, select= c('outbreak', 'home_type', 'number_beds', noncomplaints, complaints, withOrders))
skim(data_selected)
| Name | data_selected |
| Number of rows | 615 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 10 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| outbreak | 0 | 1 | FALSE | 2 | no: 323, yes: 292 |
| home_type | 0 | 1 | FALSE | 3 | For: 351, Non: 164, Mun: 100 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| number_beds | 0 | 1 | 127.27 | 73.83 | 12 | 69 | 120 | 160 | 543 | ▇▇▁▁▁ |
| total_noncomplaints | 0 | 1 | 22.98 | 12.09 | 5 | 15 | 20 | 28 | 90 | ▇▃▁▁▁ |
| X5y_noncomplaints | 0 | 1 | 13.23 | 7.11 | 4 | 9 | 11 | 15 | 68 | ▇▂▁▁▁ |
| X2y_noncomplaints | 0 | 1 | 6.00 | 3.64 | 0 | 4 | 5 | 8 | 40 | ▇▂▁▁▁ |
| total_complaints | 0 | 1 | 14.70 | 11.64 | 0 | 7 | 12 | 20 | 94 | ▇▂▁▁▁ |
| X5y_complaints | 0 | 1 | 6.60 | 5.93 | 0 | 2 | 5 | 9 | 46 | ▇▂▁▁▁ |
| X2y_complaints | 0 | 1 | 3.20 | 2.86 | 0 | 1 | 3 | 5 | 20 | ▇▂▁▁▁ |
| total_withOrders | 0 | 1 | 6.88 | 6.17 | 0 | 3 | 5 | 9 | 43 | ▇▂▁▁▁ |
| X5y_withOrders | 0 | 1 | 4.41 | 4.35 | 0 | 2 | 3 | 6 | 34 | ▇▂▁▁▁ |
| X2y_withOrders | 0 | 1 | 1.85 | 2.20 | 0 | 0 | 1 | 3 | 16 | ▇▁▁▁▁ |
fit = glm(outbreak~., family=binomial, data=data_selected)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4042 -0.9564 -0.6169 1.0565 1.9789
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.552933 0.267264 -5.810 6.23e-09 ***
## home_typeMunicipal -0.551167 0.270037 -2.041 0.0412 *
## home_typeNon-Profit 0.155759 0.216670 0.719 0.4722
## number_beds 0.011545 0.001887 6.119 9.42e-10 ***
## total_noncomplaints -0.018206 0.019458 -0.936 0.3495
## X5y_noncomplaints -0.036013 0.043604 -0.826 0.4089
## X2y_noncomplaints 0.021829 0.062826 0.347 0.7282
## total_complaints 0.036504 0.020229 1.805 0.0711 .
## X5y_complaints 0.015913 0.047024 0.338 0.7351
## X2y_complaints 0.019416 0.062150 0.312 0.7547
## total_withOrders -0.027825 0.039733 -0.700 0.4837
## X5y_withOrders 0.083603 0.075386 1.109 0.2674
## X2y_withOrders -0.024768 0.095839 -0.258 0.7961
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.55 on 602 degrees of freedom
## AIC: 756.55
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## X5y_noncomplaints + X2y_noncomplaints + total_complaints +
## X5y_complaints + X2y_complaints + total_withOrders + X5y_withOrders,
## family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4122 -0.9625 -0.6219 1.0537 1.9714
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.555533 0.267159 -5.822 5.80e-09 ***
## home_typeMunicipal -0.551354 0.270000 -2.042 0.0411 *
## home_typeNon-Profit 0.154674 0.216629 0.714 0.4752
## number_beds 0.011544 0.001886 6.121 9.32e-10 ***
## total_noncomplaints -0.018217 0.019449 -0.937 0.3489
## X5y_noncomplaints -0.030634 0.038328 -0.799 0.4241
## X2y_noncomplaints 0.011411 0.048182 0.237 0.8128
## total_complaints 0.036240 0.020193 1.795 0.0727 .
## X5y_complaints 0.015795 0.047012 0.336 0.7369
## X2y_complaints 0.018421 0.061941 0.297 0.7662
## total_withOrders -0.026402 0.039319 -0.671 0.5019
## X5y_withOrders 0.071508 0.059097 1.210 0.2263
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.61 on 603 degrees of freedom
## AIC: 754.61
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## X5y_noncomplaints + X2y_noncomplaints + total_complaints +
## X5y_complaints + total_withOrders + X5y_withOrders, family = binomial,
## data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4176 -0.9554 -0.6266 1.0543 1.9748
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.54653 0.26542 -5.827 5.65e-09 ***
## home_typeMunicipal -0.55716 0.26942 -2.068 0.0386 *
## home_typeNon-Profit 0.15254 0.21643 0.705 0.4809
## number_beds 0.01166 0.00185 6.299 3.00e-10 ***
## total_noncomplaints -0.01909 0.01925 -0.992 0.3213
## X5y_noncomplaints -0.03146 0.03821 -0.823 0.4103
## X2y_noncomplaints 0.01450 0.04691 0.309 0.7572
## total_complaints 0.03569 0.02011 1.775 0.0759 .
## X5y_complaints 0.02435 0.03737 0.651 0.5148
## total_withOrders -0.02549 0.03918 -0.651 0.5153
## X5y_withOrders 0.07052 0.05900 1.195 0.2320
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.70 on 604 degrees of freedom
## AIC: 752.7
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_noncomplaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## X5y_noncomplaints + total_complaints + X5y_complaints + total_withOrders +
## X5y_withOrders, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4284 -0.9502 -0.6204 1.0552 1.9600
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.550158 0.265675 -5.835 5.39e-09 ***
## home_typeMunicipal -0.551389 0.268796 -2.051 0.0402 *
## home_typeNon-Profit 0.156706 0.215950 0.726 0.4680
## number_beds 0.011715 0.001842 6.359 2.04e-10 ***
## total_noncomplaints -0.018734 0.019214 -0.975 0.3295
## X5y_noncomplaints -0.025634 0.033244 -0.771 0.4407
## total_complaints 0.036176 0.020051 1.804 0.0712 .
## X5y_complaints 0.022213 0.036715 0.605 0.5452
## total_withOrders -0.027152 0.038859 -0.699 0.4847
## X5y_withOrders 0.073765 0.058112 1.269 0.2043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.80 on 605 degrees of freedom
## AIC: 750.8
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## X5y_noncomplaints + total_complaints + X5y_complaints + X5y_withOrders,
## family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5331 -0.9615 -0.6241 1.0680 1.9709
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.546460 0.266125 -5.811 6.21e-09 ***
## home_typeMunicipal -0.546342 0.268538 -2.035 0.0419 *
## home_typeNon-Profit 0.145870 0.215560 0.677 0.4986
## number_beds 0.011721 0.001843 6.358 2.04e-10 ***
## total_noncomplaints -0.023973 0.017653 -1.358 0.1745
## X5y_noncomplaints -0.019374 0.032015 -0.605 0.5451
## total_complaints 0.033372 0.019554 1.707 0.0879 .
## X5y_complaints 0.028112 0.035697 0.788 0.4310
## X5y_withOrders 0.040192 0.032489 1.237 0.2160
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 731.29 on 606 degrees of freedom
## AIC: 749.29
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_noncomplaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## total_complaints + X5y_complaints + X5y_withOrders, family = binomial,
## data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5263 -0.9616 -0.6166 1.0543 1.9948
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.58272 0.25879 -6.116 9.61e-10 ***
## home_typeMunicipal -0.54990 0.26865 -2.047 0.04067 *
## home_typeNon-Profit 0.14367 0.21535 0.667 0.50467
## number_beds 0.01168 0.00184 6.348 2.17e-10 ***
## total_noncomplaints -0.03167 0.01227 -2.581 0.00986 **
## total_complaints 0.03768 0.01830 2.059 0.03948 *
## X5y_complaints 0.01949 0.03270 0.596 0.55112
## X5y_withOrders 0.03018 0.02789 1.082 0.27932
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 731.66 on 607 degrees of freedom
## AIC: 747.66
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## total_complaints + X5y_withOrders, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5648 -0.9623 -0.6222 1.0539 1.9893
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.600855 0.256928 -6.231 4.64e-10 ***
## home_typeMunicipal -0.541607 0.267981 -2.021 0.04327 *
## home_typeNon-Profit 0.146247 0.215209 0.680 0.49678
## number_beds 0.011743 0.001836 6.397 1.58e-10 ***
## total_noncomplaints -0.030982 0.012173 -2.545 0.01093 *
## total_complaints 0.045207 0.013412 3.371 0.00075 ***
## X5y_withOrders 0.032378 0.027472 1.179 0.23856
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 732.01 on 608 degrees of freedom
## AIC: 746.01
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_noncomplaints +
## total_complaints, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5854 -0.9661 -0.6290 1.0365 1.9760
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.60829 0.25716 -6.254 4.00e-10 ***
## home_typeMunicipal -0.56603 0.26722 -2.118 0.03416 *
## home_typeNon-Profit 0.16650 0.21424 0.777 0.43707
## number_beds 0.01174 0.00183 6.415 1.41e-10 ***
## total_noncomplaints -0.02500 0.01113 -2.246 0.02468 *
## total_complaints 0.04584 0.01344 3.410 0.00065 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 733.42 on 609 degrees of freedom
## AIC: 745.42
##
## Number of Fisher Scoring iterations: 4
data_selected <- subset(df, select= c('outbreak', 'home_type', 'number_beds', all_inspections, complaints, withOrders))
skim(data_selected)
| Name | data_selected |
| Number of rows | 615 |
| Number of columns | 12 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 10 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| outbreak | 0 | 1 | FALSE | 2 | no: 323, yes: 292 |
| home_type | 0 | 1 | FALSE | 3 | For: 351, Non: 164, Mun: 100 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| number_beds | 0 | 1 | 127.27 | 73.83 | 12 | 69 | 120 | 160 | 543 | ▇▇▁▁▁ |
| total_inspections | 0 | 1 | 37.68 | 21.95 | 9 | 22 | 32 | 47 | 172 | ▇▃▁▁▁ |
| X5y_inspections | 0 | 1 | 19.82 | 11.95 | 5 | 12 | 17 | 24 | 86 | ▇▃▁▁▁ |
| X2y_inspections | 0 | 1 | 9.20 | 5.59 | 1 | 5 | 8 | 12 | 44 | ▇▅▁▁▁ |
| total_complaints | 0 | 1 | 14.70 | 11.64 | 0 | 7 | 12 | 20 | 94 | ▇▂▁▁▁ |
| X5y_complaints | 0 | 1 | 6.60 | 5.93 | 0 | 2 | 5 | 9 | 46 | ▇▂▁▁▁ |
| X2y_complaints | 0 | 1 | 3.20 | 2.86 | 0 | 1 | 3 | 5 | 20 | ▇▂▁▁▁ |
| total_withOrders | 0 | 1 | 6.88 | 6.17 | 0 | 3 | 5 | 9 | 43 | ▇▂▁▁▁ |
| X5y_withOrders | 0 | 1 | 4.41 | 4.35 | 0 | 2 | 3 | 6 | 34 | ▇▂▁▁▁ |
| X2y_withOrders | 0 | 1 | 1.85 | 2.20 | 0 | 0 | 1 | 3 | 16 | ▇▁▁▁▁ |
fit = glm(outbreak~., family=binomial, data=data_selected)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ ., family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4042 -0.9564 -0.6169 1.0565 1.9789
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.552933 0.267264 -5.810 6.23e-09 ***
## home_typeMunicipal -0.551167 0.270037 -2.041 0.0412 *
## home_typeNon-Profit 0.155759 0.216670 0.719 0.4722
## number_beds 0.011545 0.001887 6.119 9.42e-10 ***
## total_inspections -0.018206 0.019458 -0.936 0.3495
## X5y_inspections -0.036013 0.043604 -0.826 0.4089
## X2y_inspections 0.021829 0.062826 0.347 0.7282
## total_complaints 0.054710 0.032437 1.687 0.0917 .
## X5y_complaints 0.051926 0.073004 0.711 0.4769
## X2y_complaints -0.002413 0.093847 -0.026 0.9795
## total_withOrders -0.027825 0.039733 -0.700 0.4837
## X5y_withOrders 0.083603 0.075386 1.109 0.2674
## X2y_withOrders -0.024768 0.095839 -0.258 0.7961
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.55 on 602 degrees of freedom
## AIC: 756.55
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## X5y_inspections + X2y_inspections + total_complaints + X5y_complaints +
## total_withOrders + X5y_withOrders + X2y_withOrders, family = binomial,
## data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4046 -0.9577 -0.6165 1.0573 1.9777
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.553778 0.265262 -5.858 4.70e-09 ***
## home_typeMunicipal -0.550539 0.268922 -2.047 0.0406 *
## home_typeNon-Profit 0.156037 0.216405 0.721 0.4709
## number_beds 0.011540 0.001876 6.151 7.69e-10 ***
## total_inspections -0.018133 0.019246 -0.942 0.3461
## X5y_inspections -0.035507 0.038904 -0.913 0.3614
## X2y_inspections 0.020611 0.041268 0.499 0.6175
## total_complaints 0.054684 0.032418 1.687 0.0916 .
## X5y_complaints 0.050758 0.057118 0.889 0.3742
## total_withOrders -0.027902 0.039623 -0.704 0.4813
## X5y_withOrders 0.083342 0.074707 1.116 0.2646
## X2y_withOrders -0.023812 0.088341 -0.270 0.7875
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.55 on 603 degrees of freedom
## AIC: 754.55
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## X5y_inspections + X2y_inspections + total_complaints + X5y_complaints +
## total_withOrders + X5y_withOrders, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4119 -0.9627 -0.6239 1.0494 1.9743
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.552962 0.265202 -5.856 4.75e-09 ***
## home_typeMunicipal -0.553511 0.268716 -2.060 0.0394 *
## home_typeNon-Profit 0.153588 0.216207 0.710 0.4775
## number_beds 0.011561 0.001874 6.169 6.88e-10 ***
## total_inspections -0.018469 0.019207 -0.962 0.3363
## X5y_inspections -0.031660 0.036187 -0.875 0.3816
## X2y_inspections 0.014203 0.033734 0.421 0.6737
## total_complaints 0.054512 0.032398 1.683 0.0925 .
## X5y_complaints 0.049712 0.056960 0.873 0.3828
## total_withOrders -0.025952 0.038914 -0.667 0.5048
## X5y_withOrders 0.070805 0.058454 1.211 0.2258
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.62 on 604 degrees of freedom
## AIC: 752.62
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X2y_inspections)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## X5y_inspections + total_complaints + X5y_complaints + total_withOrders +
## X5y_withOrders, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4284 -0.9502 -0.6204 1.0552 1.9600
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.550158 0.265675 -5.835 5.39e-09 ***
## home_typeMunicipal -0.551389 0.268796 -2.051 0.0402 *
## home_typeNon-Profit 0.156706 0.215950 0.726 0.4680
## number_beds 0.011715 0.001842 6.359 2.04e-10 ***
## total_inspections -0.018734 0.019214 -0.975 0.3295
## X5y_inspections -0.025634 0.033244 -0.771 0.4407
## total_complaints 0.054910 0.032392 1.695 0.0900 .
## X5y_complaints 0.047847 0.056775 0.843 0.3994
## total_withOrders -0.027152 0.038859 -0.699 0.4847
## X5y_withOrders 0.073765 0.058112 1.269 0.2043
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 730.80 on 605 degrees of freedom
## AIC: 750.8
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -total_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## X5y_inspections + total_complaints + X5y_complaints + X5y_withOrders,
## family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5331 -0.9615 -0.6241 1.0680 1.9709
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.546460 0.266125 -5.811 6.21e-09 ***
## home_typeMunicipal -0.546342 0.268538 -2.035 0.0419 *
## home_typeNon-Profit 0.145870 0.215560 0.677 0.4986
## number_beds 0.011721 0.001843 6.358 2.04e-10 ***
## total_inspections -0.023973 0.017653 -1.358 0.1745
## X5y_inspections -0.019374 0.032015 -0.605 0.5451
## total_complaints 0.057345 0.032101 1.786 0.0740 .
## X5y_complaints 0.047486 0.056712 0.837 0.4024
## X5y_withOrders 0.040192 0.032489 1.237 0.2160
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 731.29 on 606 degrees of freedom
## AIC: 749.29
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_inspections)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## total_complaints + X5y_complaints + X5y_withOrders, family = binomial,
## data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5263 -0.9616 -0.6166 1.0543 1.9948
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.58272 0.25879 -6.116 9.61e-10 ***
## home_typeMunicipal -0.54990 0.26865 -2.047 0.04067 *
## home_typeNon-Profit 0.14367 0.21535 0.667 0.50467
## number_beds 0.01168 0.00184 6.348 2.17e-10 ***
## total_inspections -0.03167 0.01227 -2.581 0.00986 **
## total_complaints 0.06935 0.02538 2.732 0.00629 **
## X5y_complaints 0.01949 0.03270 0.596 0.55112
## X5y_withOrders 0.03018 0.02789 1.082 0.27932
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 731.66 on 607 degrees of freedom
## AIC: 747.66
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_complaints)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## total_complaints + X5y_withOrders, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5648 -0.9623 -0.6222 1.0539 1.9893
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.600855 0.256928 -6.231 4.64e-10 ***
## home_typeMunicipal -0.541607 0.267981 -2.021 0.043273 *
## home_typeNon-Profit 0.146247 0.215209 0.680 0.496784
## number_beds 0.011743 0.001836 6.397 1.58e-10 ***
## total_inspections -0.030982 0.012173 -2.545 0.010926 *
## total_complaints 0.076189 0.022745 3.350 0.000809 ***
## X5y_withOrders 0.032378 0.027472 1.179 0.238560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 732.01 on 608 degrees of freedom
## AIC: 746.01
##
## Number of Fisher Scoring iterations: 4
fit <- update(fit, .~. -X5y_withOrders)
summary(fit)
##
## Call:
## glm(formula = outbreak ~ home_type + number_beds + total_inspections +
## total_complaints, family = binomial, data = data_selected)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5854 -0.9661 -0.6290 1.0365 1.9760
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.60829 0.25716 -6.254 4.00e-10 ***
## home_typeMunicipal -0.56603 0.26722 -2.118 0.03416 *
## home_typeNon-Profit 0.16650 0.21424 0.777 0.43707
## number_beds 0.01174 0.00183 6.415 1.41e-10 ***
## total_inspections -0.02500 0.01113 -2.246 0.02468 *
## total_complaints 0.07084 0.02235 3.170 0.00153 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 851.01 on 614 degrees of freedom
## Residual deviance: 733.42 on 609 degrees of freedom
## AIC: 745.42
##
## Number of Fisher Scoring iterations: 4